This first block of code installs and loads all the libraries needed
for the analysis. The include=FALSE option in the block
header ensures that the installation commands and loading messages will
not appear in the final HTML report, making it cleaner.
Here, we load the raw count table and gene annotations directly from the GEO repository.
# load counts table from GEO
urld <- "https://www.ncbi.nlm.nih.gov/geo/download/?format=file&type=rnaseq_counts"
path <- paste(urld, "acc=GSE205748", "file=GSE205748_raw_counts_GRCh38.p13_NCBI.tsv.gz", sep="&");
gset<- as.matrix(data.table::fread(path, header=T, colClasses="integer"), rownames="GeneID")
# load gene annotations
apath <- paste(urld, "type=rnaseq_counts", "file=Human.GRCh38.p13.annot.tsv.gz", sep="&")
annot <- data.table::fread(apath, header=T, quote="", stringsAsFactors=F, data.table=F)
# Load the dataset
gse <- getGEO("GSE205748", GSEMatrix = TRUE, AnnotGPL = TRUE)
## Found 1 file(s)
## GSE205748_series_matrix.txt.gz
## Annotation GPL not available, so will use submitter GPL instead
gse <- gse[[1]]
# Extract phenotype data
pheno_data <- pData(gse)
pheno_data <- pheno_data[, c("title", "geo_accession", "tissue type:ch1")]
Gene selection performed by text mining and functional enrichment.
#Define genes of interest
genes_of_interest <- c(
"ABCB11", "ABCD2", "ACE", "ACKR2", "ACP5", "ACP7", "ADAMTS9", "ADCY7", "ADGRL2", "ADIPOQ", "AHR", "AIM2", "AKAP13", "ALB", "ANKRD55", "ANXA1",
"APOL1", "APOL6", "AQP1", "AREG", "ARHGEF3", "ATG16L1", "ATG5", "ATOX1", "ATXN2L", "BEND2", "BGLAP", "BMP2", "BMP7", "BNIP3L", "BRAF", "BTG1",
"C15ORF48", "C7ORF57", "CALD1", "CAPS2", "CARD9", "CASP1", "CASP10", "CAST", "CCL2", "CCL20", "CCL25", "CCL3", "CCL5", "CCND1", "CCND3", "CCR2",
"CCR6", "CCR7", "CCRL2", "CD14", "CD160", "CD19", "CD274", "CD28", "CD36", "CD38", "CD3E", "CD4", "CD40", "CD40LG", "CD52", "CD63", "CD68",
"CD69", "CD80", "CD83", "CD86", "CD8A", "CD8B", "CDC42BPB", "CEBPA", "CEBPG", "CENPK", "CLEC2B", "CLEC4D", "CLIC3", "CMAHP", "CMTM2",
"COL1A1", "COX2", "CRB1", "CREM", "CRP", "CSF2", "CSMD1", "CSN3", "CSNK1A1", "CTLA4", "CTSB", "CTSK", "CUX1", "CX3CR1", "CXCL10", "CXCL13",
"CXCL16", "CXCL2", "CXCL8", "CXCR2", "CYCS", "CYP1A1", "CYP4F22", "DDIT3", "DDX60", "DKK1", "DLAT", "DNAJA2", "DNAJB6", "DUSP4", "DYSF",
"EDNRA", "EFCAB13", "EFCAB7", "EGF", "EGFR", "EGR3", "EIF5B", "ENO1", "EOMES", "ERAP1", "ERAP2", "ERN1", "ERP44", "EZH2", "FAXDC2", "FCGR1A",
"FCGR2A", "FCGR3A", "FGB", "FNBP1", "FOS", "FOSL1", "FOSL2", "FOXP3", "FRZB", "FUT2", "GAPDH", "GATA3", "GBP1", "GBP3", "GBP5", "GEM", "GINS1",
"GJB2", "GJB6", "GLUL", "GOLIM4", "GPR35", "GPT", "GZMA", "GZMB", "GZMK", "HCAR3", "HERC6", "HHAT", "HIF1A", "HK2", "HLA-A", "HLA-B", "HLA-C",
"HLA-DQB1", "HLA-DRB1", "HSPA5", "HSPA6", "HYAL4", "ICAM1", "ICOS", "IFI16", "IFI6", "IFIH1", "IFIT3", "IFNA1", "IFNG", "IFNGR1", "IFNLR1",
"IGF2", "IGF2-AS", "IL10", "IL12B", "IL13", "IL15", "IL17A", "IL17F", "IL17RA", "IL18", "IL1A", "IL1B", "IL1F10", "IL1R1", "IL1RN", "IL2",
"IL21", "IL21-AS1", "IL22", "IL23A", "IL23R", "IL2RA", "IL33", "IL36RN", "IL37", "IL4", "IL5", "IL6", "IL6R", "IL7", "INS", "INS-IGF2", "IRAK1",
"IRS1", "ITGA2B", "ITGAL", "ITGAM", "ITGAX", "JAK2", "JAK3", "JDP2", "JRKL", "JUN", "JUNB", "JUND", "KANK4", "KDM5B", "KIR2DS1", "KIR3DL1",
"KIR3DL2", "KLRB1", "LAMP1", "LAMP2", "LEP", "LILRA5", "LILRB2", "LINC01185", "LINC01250", "LMO7", "LPAL2", "LRRK2", "LTA", "LURAP1L",
"LURAP1L-AS1", "LYZ", "MAF", "MBL2", "MCAM", "MCL1", "MEFV", "MICA", "MIR146A", "MIR21", "MIX23", "MMP1", "MMP3", "MMP7", "MMP9", "MRPS23",
"MSN", "MTHFR", "MUCL1", "MX1", "MYC", "MYNN", "NABP1", "NAMPT", "NCOA7", "NDUFS1", "NFKB1", "NKG7", "NLRP3", "NMI", "NOD2", "NOS2",
"NOXRED1", "NPEPPS", "NRG1", "NT5C3A", "OLR1", "OSMR", "PDCD1", "PDLIM7", "PER1", "PFDN4", "PFDN5", "PFKL", "PGD", "PGK1", "PHEX", "PI3",
"PIK3CD", "PINK1", "PLA2G4D", "PLCG1", "PLG", "PLIN5", "PLS1", "PPARD", "PPARG", "PPARGC1A", "PPARGC1B", "PRDM1", "PRF1", "PRTN3", "PSG2",
"PSMC2", "PSMD7", "PSME2", "PTGS1", "PTGS2", "PTH", "PTPN22", "PTX3", "PYGL", "RAC1", "RBM45", "REL", "RETN", "RGPD6", "RIT1", "RORC", "RPL15",
"RPL36AL", "RPL41", "RPL7", "RPS19", "RPS21", "RPS26", "RPS6KB1", "RPS7", "RSAD2", "RUNX2", "RUNX3", "S100A12", "S100A8", "S100A9", "S100P",
"SAA1", "SAMD9", "SAR1A", "SCN1A", "SEC14L2", "SEC24B", "SELL", "SERPINA1", "SERPINB1", "SERPINE1", "SF3B1", "SF3B3", "SGK1", "SH3BGRL3",
"SIAH1", "SLC1A2", "SLC2A3", "SLC51B", "SLC7A11", "SLC7A5", "SMAD3", "SMARCA4", "SMOX", "SOCS1", "SOD2", "SOST", "SOX4", "SP7", "SPCS3",
"SPON2", "SPP1", "SSR1", "STAT1", "STAT3", "STIM1", "SUOX", "SYT1", "TALDO1", "TBX21", "TEK", "TFPI", "TGFA", "TGFB1", "TGFBR3", "TIMP1",
"TLR2", "TLR3", "TLR4", "TLR9", "TMBIM6", "TMEM45A", "TMPRSS11B", "TNF", "TNFAIP3", "TNFAIP6", "TNFAIP8", "TNFRSF10A", "TNFRSF1A",
"TNFRSF1B", "TNFRSF9", "TNFSF10", "TNFSF11", "TNFSF13B", "TNFSF15", "TNIP1", "TOMM5", "TOMM7", "TPST2", "TPT1", "TRAF2", "TRAF3IP2", "TRAF4",
"TRAF5", "TRBV20OR9-2", "TRIM22", "TRIM69", "TTC39B", "TYK2", "TYMP", "UBE2L3", "UQCR10", "UTP11", "VCAM1", "VDR", "VEGFA", "VIM", "WDR1",
"WNK1", "WWOX", "XAF1", "XBP1", "YOD1", "YWHAB", "ZC3H12A", "ZFP36", "ZMIZ1", "ZNF316", "ZNF415", "ZNF483"
)
# Load the data.table package
library(data.table)
# Keep only the notes for the genes of interest.
annot_filtered <- annot[annot$Symbol %in% genes_of_interest, ]
# Filter the count matrix using GeneIDs as the key (MUCH MORE SECURE)
expr_data_filtered <- gset[rownames(gset) %in% annot_filtered$GeneID, ]
# Filter expression data for genes of interest
express <- rownames(expr_data_filtered)
# Mapping Entrez IDs to gene symbols
gene_symbols <- mapIds(org.Hs.eg.db, keys = rownames(expr_data_filtered), column = "SYMBOL", keytype = "ENTREZID", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Replace rownames with gene symbols
rownames(expr_data_filtered) <- gene_symbols
# View the updated dataframe
print("Data frame com rownames modificados para símbolos de genes:")
## [1] "Data frame com rownames modificados para símbolos de genes:"
print(expr_data_filtered)
## GSM6222612 GSM6222613 GSM6222614 GSM6222615 GSM6222616 GSM6222617
## TNFRSF9 2 27 10 0 7 7
## ENO1 10652 14830 16786 13529 13523 10798
## PIK3CD 284 505 494 224 350 474
## PGD 1362 1863 2380 2955 3061 2001
## MTHFR 724 1582 1712 580 1122 935
## TNFRSF1B 317 700 634 303 401 663
## PINK1 1716 1914 2019 2979 2458 2495
## IFNLR1 449 385 473 259 586 345
## RUNX3 1015 898 1250 567 946 794
## SH3BGRL3 2089 3566 3529 4353 3158 4566
## CD52 87 272 211 198 123 266
## IFI6 347 367 1070 456 299 939
## ZC3H12A 536 569 679 581 1941 511
## UTP11 542 852 865 825 677 805
## JUN 1526 1918 1944 1239 2230 2325
## KANK4 27 58 85 125 165 100
## EFCAB7 144 268 298 164 200 262
## IL23R 1 1 5 0 3 4
## ADGRL2 550 641 680 1004 1324 896
## GBP3 325 507 360 369 530 447
## GBP1 260 568 649 323 618 884
## GBP5 27 59 76 21 16 107
## TGFBR3 1078 1971 1804 1344 2673 2115
## VCAM1 49 400 279 105 132 223
## PTPN22 32 46 45 35 37 80
## CD160 11 33 51 18 24 18
## FCGR1A 2 9 21 7 6 8
## MCL1 3897 4888 5258 4524 7500 6658
## CTSK 2143 11482 10318 3137 4412 9330
## RORC 961 944 1418 1313 1406 1344
## S100A9 77 430 331 248 5157 323
## S100A12 1 5 5 0 10 10
## S100A8 65 434 388 386 3893 185
## IL6R 434 427 330 288 507 527
## RIT1 435 634 632 594 679 665
## BGLAP 28 64 49 26 21 37
## IFI16 1985 2513 2692 1648 2845 3002
## AIM2 3 16 10 8 13 10
## CRP 0 1 0 0 0 0
## FCGR2A 107 348 209 101 151 291
## HSPA6 271 426 331 168 256 392
## FCGR3A 33 102 59 98 48 102
## SELL 20 56 71 15 24 101
## GLUL 6926 8315 9542 6633 10285 10338
## PTGS2 37 49 50 89 106 56
## CRB1 9 2 4 7 3 6
## KDM5B 1691 1950 2290 1748 3407 2073
## IL10 2 6 2 2 3 3
## YOD1 1011 2294 2456 2773 2129 2525
## HHAT 130 127 232 182 331 240
## TRAF5 270 509 361 249 246 487
## NLRP3 25 38 37 18 50 55
## LINC01250 0 0 4 0 0 1
## RPS7 12403 14409 16815 23213 12717 24182
## RSAD2 35 59 118 54 67 129
## FOSL2 5780 6432 8017 4807 10172 6622
## REL 978 761 780 704 1782 881
## TGFA 512 609 662 1286 1277 940
## DYSF 151 419 491 240 256 355
## HK2 418 723 942 1040 1490 677
## CD8A 38 111 62 48 86 169
## CD8B 26 61 34 32 75 82
## EIF5B 3619 5862 5931 3400 5065 4598
## IL1R1 1832 1908 2038 1587 3054 2597
## RGPD6 626 530 484 518 658 744
## IL1A 17 135 46 56 128 32
## IL1B 7 29 12 49 96 9
## IL37 1340 2783 2964 2765 1874 2715
## IL36RN 1537 2646 2899 3273 2480 3173
## IL1F10 128 158 237 144 127 195
## IL1RN 2209 3189 3008 2643 2377 2946
## NMI 269 490 587 421 519 669
## TNFAIP6 25 150 150 30 76 135
## IFIH1 287 522 500 364 553 337
## SCN1A 1 1 2 5 5 4
## ABCB11 11 44 44 82 122 33
## RBM45 144 207 281 198 231 263
## FRZB 416 646 1027 781 546 890
## TFPI 278 732 624 306 381 632
## STAT1 1429 1638 1976 1343 2470 2558
## NABP1 165 349 378 448 340 343
## SF3B1 6174 7041 7886 6575 8777 8635
## CASP10 310 455 630 280 414 517
## CD28 4 27 12 6 24 47
## CTLA4 4 29 11 6 4 8
## ICOS 1 21 2 2 2 12
## NDUFS1 1476 1844 2161 2115 3360 1951
## CXCR2 84 172 138 96 176 137
## IRS1 256 735 601 580 800 800
## CCL20 0 38 34 19 18 3
## ATG16L1 859 856 1089 768 1256 913
## GPR35 48 68 126 18 65 70
## PDCD1 5 14 11 1 4 23
## PPARG 51 208 197 826 534 312
## RPL15 21497 29981 31081 38260 25740 45023
## EOMES 3 16 8 5 8 14
## CX3CR1 71 119 134 59 87 120
## ACKR2 21 68 50 34 61 48
## CCR2 33 71 88 60 45 95
## CCRL2 7 23 20 12 3 18
## TLR9 17 28 32 14 13 18
## ARHGEF3 483 641 740 536 925 1039
## ADAMTS9 102 311 218 191 206 257
## TMEM45A 10685 21584 18471 20104 18720 15315
## CD80 2 3 1 0 1 1
## CD86 61 111 130 69 101 153
## MIX23 169 337 363 315 215 345
## PLS1 88 99 135 95 199 150
## PTX3 15 43 36 11 12 49
## GOLIM4 486 892 944 587 1016 1077
## MYNN 444 544 575 557 734 658
## TNFSF10 965 1436 1636 1155 1505 2040
## ADIPOQ 17 417 157 546 47 1233
## SPON2 607 1285 1356 1162 926 2071
## S100P 252 89 431 370 122 151
## WDR1 4297 7813 7154 5565 6405 6187
## CD38 2 10 10 6 0 11
## PPARGC1A 376 188 337 499 491 312
## TMPRSS11B 0 0 0 0 0 0
## CSN3 0 0 1 0 0 0
## ALB 14 9 16 6 8 16
## CXCL8 3 5 6 2 8 5
## CXCL2 12 14 13 8 2 5
## AREG 101 81 72 152 155 71
## CXCL10 5 36 57 5 0 77
## CXCL13 2 2 0 0 9 1
## SPP1 1 2 6 7 7 7
## HERC6 137 177 310 124 285 282
## NFKB1 1228 1676 1620 1207 2149 1699
## SEC24B 914 1169 1313 1082 1515 1356
## EGF 49 80 77 87 63 139
## IL2 0 2 0 1 0 2
## IL21 0 0 0 0 0 0
## IL21-AS1 0 0 0 0 1 0
## SLC7A11 60 154 125 47 163 115
## IL15 33 135 118 85 45 149
## EDNRA 173 413 402 337 263 569
## TLR2 102 139 169 200 458 203
## FGB 0 0 0 0 0 1
## DDX60 661 617 769 454 1059 814
## SPCS3 1440 1849 1933 2314 2433 2693
## TLR3 98 104 107 94 152 207
## OSMR 562 737 739 583 922 697
## GZMK 14 57 35 5 24 41
## GZMA 12 48 83 18 20 96
## ANKRD55 7 5 3 6 4 3
## CENPK 56 153 161 114 114 145
## CAST 7657 8059 8664 8059 11764 9845
## ERAP1 2227 2627 2237 1506 2924 2018
## ERAP2 140 325 781 168 255 854
## TNFAIP8 592 1133 1156 1113 1135 1462
## CSF2 1 0 0 2 0 3
## IL5 0 4 1 0 1 3
## IL13 0 4 1 0 0 0
## IL4 3 5 4 1 1 3
## CD14 236 1028 848 288 388 807
## CSNK1A1 6439 9369 9762 8725 12383 10761
## PPARGC1B 275 299 318 302 770 256
## TNIP1 1611 2682 2651 2689 2791 3532
## ATOX1 300 460 537 721 430 631
## FAXDC2 781 1800 1761 3309 3067 1444
## IL12B 3 2 1 1 6 1
## MIR146A 2 3 0 2 1 1
## PDLIM7 739 1980 1505 1405 740 1311
## SERPINB1 602 826 885 652 735 1247
## SSR1 1961 3147 3473 2409 3573 3278
## CD83 105 260 208 147 196 206
## SOX4 1401 1741 1788 1136 1311 2054
## CMAHP 515 976 1471 728 711 1030
## HLA-A 16390 19941 26976 8603 21863 24629
## HLA-C 15998 17888 30681 16498 14203 22005
## HLA-B 23266 30506 41190 16353 31062 36087
## MICA 267 675 313 368 332 510
## LTA 14 19 22 4 18 9
## TNF 59 112 83 57 57 77
## HLA-DRB1 2595 3529 4884 3470 2545 6918
## HLA-DQB1 642 1137 1969 1580 947 2522
## PPARD 1049 1791 2129 1499 1924 1470
## CCND3 942 1028 1293 941 1150 1089
## VEGFA 967 1072 1396 890 1053 821
## RUNX2 119 159 147 85 147 174
## IL17A 0 0 0 0 2 0
## IL17F 0 0 0 0 7 0
## PRDM1 537 1034 1020 1168 1182 825
## ATG5 375 705 748 760 648 988
## TRAF3IP2 917 1361 1406 1100 1352 1547
## NCOA7 361 547 445 586 739 785
## SGK1 1565 2169 3636 2217 1912 2680
## IFNGR1 1516 2870 2904 1854 2439 2603
## TNFAIP3 511 863 867 584 1211 1178
## SOD2 2289 4500 3961 3418 4414 3782
## LPAL2 7 30 7 16 10 15
## PLG 5 12 6 6 5 3
## CCR6 38 121 90 68 155 152
## RAC1 6049 12161 11758 7544 8667 9676
## ZNF316 1654 3285 4517 1393 2196 2255
## AHR 1398 2252 2135 2191 3470 3029
## IL6 3 6 2 2 0 8
## TOMM7 3531 3634 4691 5514 3022 7544
## CYCS 1488 2983 3267 3585 2817 2688
## AQP1 5450 10163 9168 6306 6375 11991
## NT5C3A 593 1032 1386 1540 1104 1158
## EGFR 6153 5020 6337 5107 11485 7981
## CD36 435 1856 1553 1559 1407 2148
## SAMD9 106 128 136 179 313 240
## SERPINE1 73 177 158 86 142 92
## CUX1 1393 1771 2134 1294 2659 2268
## PSMC2 1659 2762 3176 2476 2419 2617
## NAMPT 1078 1491 1570 1572 2462 1468
## HYAL4 33 10 72 20 40 7
## LEP 7 134 18 217 2 444
## CALD1 4068 7827 4959 4136 3886 5766
## BRAF 738 832 901 892 1103 1125
## EZH2 462 677 912 655 695 709
## DNAJB6 2665 4612 5033 3998 4649 4326
## CSMD1 72 93 8 35 23 28
## CTSB 4344 11677 9495 10596 11015 9114
## EGR3 1600 2577 2564 2735 3526 1942
## TNFRSF10A 131 111 143 95 179 154
## BNIP3L 2339 4157 4438 6071 4658 4634
## DUSP4 443 399 343 1118 751 336
## NRG1 279 245 331 172 344 241
## RPL7 23865 29483 30936 39818 26781 45705
## IL7 96 222 271 215 269 330
## GEM 119 430 439 234 291 440
## MYC 1007 1784 2958 1603 2769 2171
## GPT 483 947 1063 2011 1572 708
## JAK2 724 634 638 492 979 757
## CD274 16 49 15 13 27 49
## IL33 1053 1846 1451 1170 1427 1781
## LURAP1L-AS1 2 3 9 4 7 3
## LURAP1L 275 489 551 367 262 558
## TTC39B 753 1117 1319 2583 2510 1237
## IFNA1 0 0 0 0 0 0
## TEK 146 276 251 176 204 314
## TRBV20OR9-2 0 0 0 0 2 0
## TOMM5 448 1011 1034 1272 702 1144
## ANXA1 5102 9731 11536 8457 7094 10526
## ERP44 784 1502 1767 1152 1388 1459
## ZNF483 224 197 249 158 110 298
## TNFSF15 21 30 18 17 43 16
## TLR4 104 282 281 163 215 478
## PTGS1 6519 6887 7433 4029 8774 6559
## HSPA5 3746 7806 8964 4431 9110 5080
## FNBP1 1310 2378 2775 1594 2022 2452
## CARD9 100 236 214 101 150 222
## TRAF2 268 587 455 354 405 464
## CLIC3 1259 1814 1747 1664 1372 2075
## IL2RA 8 33 5 5 16 34
## GATA3 5874 9882 11013 4583 7590 8040
## VIM 18269 36935 28686 23620 20566 47978
## CREM 155 352 352 316 273 373
## DKK1 16 23 28 27 25 20
## MBL2 0 0 0 0 0 0
## SAR1A 1796 2423 2599 2078 2337 2866
## PRF1 17 74 58 18 24 72
## ZMIZ1 1807 3367 3689 1718 3932 2542
## IFIT3 241 477 533 288 308 620
## TALDO1 1983 3126 3398 4633 3613 3403
## IGF2 252 315 404 256 331 499
## INS-IGF2 229 304 378 255 293 478
## IGF2-AS 3 1 2 1 2 1
## INS 0 0 0 0 0 0
## STIM1 2224 3008 3673 1334 3360 2629
## TRIM22 434 945 954 402 796 1123
## PTH 0 0 0 0 0 0
## SAA1 3 34 73 570 162 315
## SLC1A2 50 61 35 51 85 70
## FOSL1 26 19 41 44 34 47
## CCND1 6066 12188 12544 5333 10977 11583
## JRKL 238 314 239 216 345 257
## MMP7 715 118 912 210 149 326
## MMP1 6 0 4 1 4 2
## MMP3 15 1 6 6 4 2
## CASP1 591 1249 1315 1043 1062 1536
## DLAT 562 686 821 863 1139 795
## IL18 2542 3117 3084 3509 2853 3467
## CD3E 41 167 79 38 83 179
## MCAM 1176 3184 2338 2243 1632 2121
## WNK1 5021 4979 5457 4006 10570 6039
## TNFRSF1A 2526 3104 3658 2510 3670 3717
## GAPDH 20302 25743 31808 35503 22529 30124
## CD4 330 854 850 257 421 780
## SLC2A3 165 234 219 119 195 282
## CLEC4D 1 3 0 0 0 0
## KLRB1 6 28 40 16 10 50
## CD69 5 60 39 26 46 104
## CLEC2B 1143 1569 2368 1792 1718 2601
## OLR1 0 3 1 3 6 0
## ABCD2 10 35 32 36 14 91
## LRRK2 357 332 352 233 750 516
## VDR 1130 1617 1893 1529 2380 1690
## TMBIM6 9051 12982 14532 16482 18086 13529
## PFDN5 3197 4740 5065 6694 3655 7516
## SP7 2 0 3 5 0 1
## MUCL1 2549 4340 6798 9638 2343 5597
## CD63 4955 11359 10139 9141 5549 12889
## SUOX 2304 3391 3708 2546 3384 2862
## RPS26 517 926 1055 1117 2980 5144
## RPL41 7939 10874 12532 15880 8644 17038
## IL23A 13 16 35 19 8 15
## DDIT3 301 382 435 348 238 454
## IFNG 0 1 3 0 1 0
## IL22 0 0 0 0 0 0
## LYZ 531 1091 995 1268 654 2287
## CAPS2 67 55 71 84 75 111
## SYT1 3 43 44 37 20 21
## BTG1 7162 10685 12504 7694 8283 10888
## HCAR3 198 245 487 364 263 479
## GJB2 1349 1380 3749 2818 2806 2128
## GJB6 1029 887 3263 2184 1349 2338
## TNFSF11 0 12 7 22 15 19
## TPT1 51220 70418 78301 115159 57482 116850
## LMO7 1303 1354 1523 1372 1895 1528
## TNFSF13B 68 171 154 97 69 241
## LAMP1 5840 10475 13266 5663 9596 8068
## PSME2 832 1411 1653 1412 1044 1684
## GZMB 3 7 12 3 7 34
## RPL36AL 2505 4263 4613 4166 2787 6001
## PYGL 1228 1485 1353 1093 1665 1129
## HIF1A 1322 1051 1401 1271 1832 1540
## FOS 986 1366 4585 1189 3074 5074
## JDP2 711 1054 1216 719 1143 1563
## NOXRED1 37 35 38 29 53 80
## SERPINA1 21 74 267 42 54 128
## CDC42BPB 2309 3761 4232 1678 4605 2795
## PLA2G4D 294 734 1161 372 1285 588
## TRIM69 13 22 42 25 26 21
## SLC51B 14 31 13 17 14 28
## SMAD3 1185 1781 1988 1019 1595 1626
## CYP1A1 20 62 49 37 11 41
## AKAP13 2497 2896 3052 1949 4454 3359
## MEFV 2 21 17 6 6 7
## SOCS1 50 96 89 109 75 185
## ATXN2L 1677 3708 4833 1668 2827 2343
## CD19 2 10 11 6 10 2
## ITGAL 38 199 151 53 118 216
## ITGAM 70 324 277 86 137 299
## ITGAX 73 255 215 90 72 223
## DNAJA2 1570 2198 2392 2084 2276 2321
## SIAH1 746 1429 1500 964 904 1281
## ADCY7 808 1022 1154 714 1597 1236
## NOD2 418 736 576 422 558 527
## CMTM2 4 7 7 7 6 6
## SF3B3 2215 3254 3387 2211 4454 3266
## PSMD7 1877 2940 3149 2906 2766 2993
## WWOX 242 204 223 176 182 240
## MAF 6116 9500 10961 4472 8032 9876
## SLC7A5 671 1313 1837 1497 2438 874
## CXCL16 630 1178 1027 879 822 894
## XAF1 420 604 1293 409 536 1001
## CD68 289 1025 869 383 467 1052
## PER1 3195 7686 13116 2205 5150 7410
## NOS2 4 2 5 1 0 18
## TRAF4 526 543 846 826 548 583
## CCL2 206 215 217 183 281 289
## CCL5 55 127 145 44 73 195
## CCL3 0 8 6 4 1 10
## CCR7 19 35 37 20 45 20
## STAT3 3976 5694 6006 3376 9020 4567
## SOST 6 1 1 17 1 0
## ITGA2B 15 32 26 11 18 15
## EFCAB13 72 115 152 113 80 193
## NPEPPS 2232 2758 2752 2362 3478 2584
## TBX21 1 7 8 0 1 18
## COL1A1 32545 36338 49051 11365 52517 28945
## MRPS23 469 742 834 776 615 873
## MIR21 0 0 3 1 1 0
## RPS6KB1 706 829 875 820 1139 1031
## ACE 263 630 597 189 410 553
## ERN1 718 712 705 521 1062 753
## PRTN3 0 2 1 2 0 2
## PLIN5 38 217 504 2167 1792 116
## RETN 2 1 2 5 5 5
## CCL25 0 0 3 0 3 1
## ICAM1 223 392 467 149 252 369
## TYK2 1577 2964 4026 1596 2187 2165
## SMARCA4 2562 3670 4054 2169 3800 2862
## ACP5 461 707 722 965 837 757
## JUNB 2991 2811 3293 3566 3475 3221
## CYP4F22 1334 2253 2587 2440 2434 1437
## JAK3 76 205 224 76 133 151
## JUND 3676 3893 5821 5721 5118 7930
## CEBPA 4803 6730 8604 8823 8772 8665
## CEBPG 1638 1832 2119 1937 2525 2465
## ACP7 144 902 738 731 676 376
## ZFP36 1243 2157 3020 1596 2380 2467
## TGFB1 931 1246 1207 936 998 1575
## RPS19 18927 24164 25096 31759 19408 36466
## PSG2 0 4 0 5 5 5
## FUT2 152 109 164 83 105 115
## NKG7 16 36 54 24 30 72
## ZNF415 133 199 131 221 212 180
## LILRB2 36 106 91 50 31 65
## LILRA5 0 1 4 4 2 12
## KIR3DL1 0 0 0 1 0 1
## KIR3DL2 0 1 0 1 0 0
## SMOX 199 507 390 622 559 502
## BMP2 324 631 981 400 737 848
## GINS1 96 210 274 130 215 158
## PLCG1 1814 3004 3114 1661 2638 2350
## YWHAB 6612 10179 10036 8220 9821 9703
## PI3 87 173 183 75 825 86
## MMP9 51 199 133 49 40 123
## CD40 282 599 582 377 388 635
## PFDN4 182 385 409 456 309 488
## BMP7 935 1376 1815 1364 1468 1446
## RPS21 7083 10639 12933 14725 6686 17122
## MX1 433 414 1019 311 471 723
## PFKL 3493 6404 7613 5106 5817 5185
## IL17RA 705 1136 1435 554 1220 869
## UBE2L3 1287 2293 2260 2416 2161 2304
## TPST2 506 1036 1134 785 980 1187
## XBP1 2024 3440 3905 4018 3280 3255
## UQCR10 587 1083 1214 1674 812 1192
## SEC14L2 185 301 491 446 357 346
## APOL6 310 620 507 312 558 663
## APOL1 154 291 445 129 187 381
## TYMP 542 1346 1118 1384 1128 1278
## BEND2 0 0 0 0 3 1
## PHEX 12 17 16 19 18 21
## TIMP1 636 1840 1760 919 687 1728
## FOXP3 63 120 97 65 99 56
## MSN 4547 9731 9303 3551 7518 5467
## PGK1 4315 6758 7546 5684 7388 5833
## LAMP2 3316 4845 4790 4029 6396 4760
## CD40LG 7 36 14 9 16 23
## IRAK1 1554 2164 2472 2461 3183 2636
## COX2 87139 105917 111893 94767 92660 95699
## GSM6222618 GSM6222619 GSM6222620 GSM6222621 GSM6222622 GSM6222623
## TNFRSF9 11 1 5 53 211 36
## ENO1 15908 14256 13408 36557 38081 31920
## PIK3CD 569 526 343 676 722 551
## PGD 1971 1933 2349 4205 5508 5316
## MTHFR 1514 1263 1147 1683 2486 1612
## TNFRSF1B 798 515 343 903 1674 1081
## PINK1 2405 2059 2013 1484 1637 1935
## IFNLR1 540 763 534 512 645 400
## RUNX3 1034 1656 892 898 1487 839
## SH3BGRL3 3735 2908 3246 6438 7082 7030
## CD52 267 116 86 254 249 193
## IFI6 803 436 482 2094 25391 5873
## ZC3H12A 511 660 770 6971 10259 7861
## UTP11 747 660 744 1133 731 1106
## JUN 2396 1439 1904 1703 2383 1755
## KANK4 76 67 43 17 10 17
## EFCAB7 282 249 218 141 128 146
## IL23R 1 0 0 5 13 7
## ADGRL2 1012 852 979 1687 2086 1672
## GBP3 658 215 371 1090 2319 688
## GBP1 638 547 611 1682 6371 1507
## GBP5 99 44 35 233 1364 453
## TGFBR3 2421 2178 2520 1103 1091 1013
## VCAM1 308 154 141 309 564 347
## PTPN22 59 50 27 119 236 222
## CD160 31 29 13 23 8 28
## FCGR1A 9 7 4 18 47 18
## MCL1 7054 6474 8011 11277 12743 12550
## CTSK 9062 4104 3003 2902 3037 5099
## RORC 1233 1345 1030 72 72 127
## S100A9 2131 557 972 182198 240816 231147
## S100A12 5 1 1 782 903 1007
## S100A8 2026 936 1360 133147 160725 156027
## IL6R 740 947 538 867 817 970
## RIT1 892 714 744 1046 1189 1194
## BGLAP 44 46 17 10 16 12
## IFI16 3161 2606 2797 8556 12842 10647
## AIM2 11 6 10 28 133 32
## CRP 0 1 0 0 0 0
## FCGR2A 295 102 93 284 420 270
## HSPA6 351 168 232 324 436 347
## FCGR3A 104 76 23 204 683 355
## SELL 101 21 15 198 230 226
## GLUL 8873 7531 7712 8341 16416 14370
## PTGS2 96 145 164 151 220 316
## CRB1 1 4 2 1 3 1
## KDM5B 2642 3113 3249 3600 4704 4102
## IL10 1 0 0 6 23 12
## YOD1 3077 1770 2066 2406 2437 3691
## HHAT 269 288 323 189 235 278
## TRAF5 589 537 308 219 300 245
## NLRP3 86 21 10 61 119 93
## LINC01250 2 7 1 1 0 0
## RPS7 18462 12970 13765 9710 9744 10279
## RSAD2 107 53 82 259 3922 821
## FOSL2 9297 9488 10163 9199 12593 9754
## REL 1167 1795 1782 2207 3316 2852
## TGFA 1047 868 867 1609 2873 1507
## DYSF 412 276 332 507 521 380
## HK2 935 1719 1517 4481 6529 5212
## CD8A 87 89 48 225 516 274
## CD8B 42 47 21 60 123 53
## EIF5B 5215 5494 5358 7194 7776 6352
## IL1R1 2949 3086 2627 3107 4137 3685
## RGPD6 737 970 698 378 454 436
## IL1A 41 16 23 28 32 24
## IL1B 5 7 7 59 299 419
## IL37 1805 1253 1199 52 86 128
## IL36RN 3340 2103 2188 13994 20314 10440
## IL1F10 171 124 94 56 86 83
## IL1RN 2871 2122 2614 4107 8423 5998
## NMI 519 335 437 1010 1654 1070
## TNFAIP6 192 84 66 33 119 59
## IFIH1 519 492 468 1092 3987 1756
## SCN1A 5 6 2 4 1 2
## ABCB11 63 95 67 40 60 29
## RBM45 250 208 236 251 296 284
## FRZB 1173 1069 526 191 128 464
## TFPI 524 410 371 373 389 558
## STAT1 2167 2281 2445 7494 24127 10245
## NABP1 394 253 206 443 843 453
## SF3B1 10250 12270 10859 7779 9045 7830
## CASP10 575 447 312 828 1275 834
## CD28 59 29 18 173 177 104
## CTLA4 26 10 1 70 213 64
## ICOS 24 4 4 67 198 62
## NDUFS1 2301 3227 2635 3786 4051 3780
## CXCR2 112 108 141 1023 1409 1257
## IRS1 841 537 747 626 432 450
## CCL20 4 0 7 62 114 118
## ATG16L1 1032 1207 1138 861 935 721
## GPR35 89 82 40 18 29 27
## PDCD1 17 10 7 22 160 56
## PPARG 292 106 69 87 40 105
## RPL15 31603 25154 26335 18923 19019 20574
## EOMES 11 9 4 19 47 46
## CX3CR1 120 145 29 157 158 165
## ACKR2 63 25 32 858 638 504
## CCR2 112 78 49 277 174 258
## CCRL2 20 6 3 20 48 36
## TLR9 21 31 12 16 11 8
## ARHGEF3 970 990 982 1205 1358 1389
## ADAMTS9 492 243 307 232 111 168
## TMEM45A 16019 14977 19505 29543 32425 38671
## CD80 2 6 1 19 88 22
## CD86 127 96 74 166 373 154
## MIX23 276 253 246 283 285 292
## PLS1 185 161 208 254 388 343
## PTX3 47 37 34 35 23 41
## GOLIM4 1406 1107 1129 1378 1322 1303
## MYNN 721 670 830 563 575 541
## TNFSF10 1825 1816 1254 3669 4719 4220
## ADIPOQ 1808 80 134 263 1 234
## SPON2 2243 1357 847 388 250 429
## S100P 276 280 198 265 354 716
## WDR1 6450 6279 6295 10169 11290 8320
## CD38 11 8 5 55 109 100
## PPARGC1A 509 767 529 56 18 138
## TMPRSS11B 1 0 0 0 0 0
## CSN3 0 0 0 0 0 0
## ALB 19 8 1 0 1 0
## CXCL8 3 3 0 144 1414 1595
## CXCL2 7 1 4 21 148 45
## AREG 90 107 121 206 269 242
## CXCL10 24 3 13 98 1125 194
## CXCL13 2 3 3 27 581 109
## SPP1 18 7 0 48 32 49
## HERC6 318 310 306 963 9593 2782
## NFKB1 1995 2063 2232 3816 4439 3315
## SEC24B 1560 1735 1644 1627 1776 1436
## EGF 135 119 127 7 17 13
## IL2 1 1 0 0 0 3
## IL21 1 0 0 3 11 7
## IL21-AS1 0 0 0 1 7 2
## SLC7A11 117 96 146 1030 1129 814
## IL15 129 42 53 62 149 59
## EDNRA 573 406 458 388 564 424
## TLR2 242 249 116 361 499 349
## FGB 0 0 0 0 0 0
## DDX60 892 924 966 1320 6882 2249
## SPCS3 2358 2328 2541 3557 3972 3627
## TLR3 169 166 158 233 355 274
## OSMR 978 1244 922 2422 3002 2369
## GZMK 36 17 44 24 143 78
## GZMA 74 18 27 81 211 114
## ANKRD55 4 2 1 2 1 5
## CENPK 224 114 180 364 295 237
## CAST 11293 11287 12544 10793 12342 11659
## ERAP1 2470 3035 3594 2874 3310 3028
## ERAP2 1620 204 2074 1552 2668 175
## TNFAIP8 1311 902 1022 1363 1834 1517
## CSF2 1 0 0 0 13 2
## IL5 0 1 7 3 0 0
## IL13 0 11 0 3 7 0
## IL4 5 4 2 1 0 0
## CD14 859 364 215 550 799 761
## CSNK1A1 10265 10637 13275 20400 23887 18325
## PPARGC1B 410 597 356 1304 1206 1217
## TNIP1 3418 2647 2523 3893 6816 4737
## ATOX1 480 402 455 758 912 790
## FAXDC2 1585 1548 990 1375 1435 1730
## IL12B 2 1 0 20 58 7
## MIR146A 3 1 1 2 3 4
## PDLIM7 1447 1467 962 908 920 762
## SERPINB1 1169 940 834 1570 3210 2929
## SSR1 3286 3304 3542 5064 6006 4913
## CD83 208 94 153 300 1085 420
## SOX4 1943 2030 2288 1159 1053 1739
## CMAHP 1283 737 980 106 128 122
## HLA-A 16592 23274 16328 19518 48375 22045
## HLA-C 22859 13422 17591 18222 36790 21969
## HLA-B 25610 33946 23037 34570 96511 40482
## MICA 544 492 212 298 347 267
## LTA 12 29 15 22 25 29
## TNF 38 55 51 43 115 65
## HLA-DRB1 6611 2362 3420 4844 6808 3947
## HLA-DQB1 2918 996 1574 1162 2148 995
## PPARD 1998 1822 1877 4462 5402 4379
## CCND3 1151 1138 962 1519 1732 1199
## VEGFA 1359 1406 739 1473 2117 1779
## RUNX2 227 157 128 190 241 153
## IL17A 0 0 0 26 60 35
## IL17F 0 0 0 11 6 14
## PRDM1 1328 860 1022 3004 4694 3431
## ATG5 801 656 703 624 787 694
## TRAF3IP2 1609 1477 1426 1984 1844 1710
## NCOA7 825 685 747 1541 2073 1409
## SGK1 2734 1713 1864 3967 6857 6139
## IFNGR1 2663 2473 2422 4357 6051 4935
## TNFAIP3 1048 793 1488 753 1363 828
## SOD2 5082 4074 3821 11711 22326 16256
## LPAL2 16 12 13 5 10 4
## PLG 11 2 3 4 8 6
## CCR6 116 152 105 133 180 140
## RAC1 8947 8936 9599 11762 13918 13048
## ZNF316 3221 2891 2396 1589 1670 1505
## AHR 3018 3049 3391 4243 4641 4641
## IL6 3 1 2 5 17 8
## TOMM7 4715 3785 3312 2225 2661 2575
## CYCS 2613 2218 2509 6387 6386 5961
## AQP1 13444 9717 7269 5853 6331 8879
## NT5C3A 1077 819 903 1593 3142 2230
## EGFR 8295 12481 12131 9249 9211 8092
## CD36 2704 1162 1368 4332 6600 6466
## SAMD9 303 219 171 1716 6534 4375
## SERPINE1 236 171 126 158 293 85
## CUX1 2320 2433 2659 3365 3985 3265
## PSMC2 2709 2514 2922 3176 3625 2994
## NAMPT 1670 2039 2337 7172 12050 9107
## HYAL4 36 33 50 325 643 564
## LEP 340 27 55 164 11 16
## CALD1 7069 6708 4903 5905 4494 4918
## BRAF 1333 1371 1365 998 1162 1074
## EZH2 929 992 809 1411 1455 1345
## DNAJB6 4349 3835 4467 7432 9678 6928
## CSMD1 30 97 52 60 2 9
## CTSB 11764 10086 7699 23433 28087 32698
## EGR3 2186 2182 2560 2243 2161 2028
## TNFRSF10A 204 232 212 441 485 424
## BNIP3L 4705 4281 4472 3504 4316 5250
## DUSP4 494 442 384 571 916 336
## NRG1 296 383 793 335 420 293
## RPL7 31484 25727 28959 20643 17849 21949
## IL7 276 218 277 143 189 195
## GEM 383 321 278 246 253 216
## MYC 2431 1456 2551 3925 3294 3239
## GPT 628 634 616 325 281 257
## JAK2 822 1164 1113 920 1752 1130
## CD274 43 23 14 326 803 300
## IL33 1565 997 1162 2495 1214 2100
## LURAP1L-AS1 12 4 13 0 6 6
## LURAP1L 564 416 516 477 511 483
## TTC39B 1372 1543 1530 2754 3467 2792
## IFNA1 0 0 0 0 0 1
## TEK 393 317 244 219 226 296
## TRBV20OR9-2 0 0 0 1 1 0
## TOMM5 752 654 781 986 1036 928
## ANXA1 9438 6556 7729 9877 14608 8584
## ERP44 1308 1210 1370 1687 2032 1673
## ZNF483 270 379 187 99 122 145
## TNFSF15 34 21 12 106 98 57
## TLR4 441 271 222 274 291 295
## PTGS1 9857 9535 10465 7545 8066 6439
## HSPA5 6073 6564 7440 14120 18520 10919
## FNBP1 2754 2117 2267 2816 3592 2676
## CARD9 154 261 168 101 237 159
## TRAF2 465 410 312 582 772 540
## CLIC3 1425 1229 1331 2174 2673 2160
## IL2RA 29 12 3 108 212 78
## GATA3 8145 7475 8624 2987 3063 2623
## VIM 39274 23045 17986 18566 20919 20086
## CREM 338 268 234 223 311 254
## DKK1 24 29 23 8 2 4
## MBL2 0 0 0 0 0 0
## SAR1A 2975 2672 2559 2377 2642 2602
## PRF1 41 27 17 95 296 140
## ZMIZ1 3574 3886 4032 4757 5034 3824
## IFIT3 446 317 338 847 6690 1682
## TALDO1 2997 2344 2689 4723 5478 5098
## IGF2 379 630 261 318 490 371
## INS-IGF2 360 558 236 284 442 344
## IGF2-AS 5 2 3 0 0 0
## INS 0 0 0 0 0 0
## STIM1 2866 3431 3532 2774 3368 2664
## TRIM22 1137 962 678 1354 8404 2980
## PTH 0 0 0 0 0 0
## SAA1 20 21 27 198 531 731
## SLC1A2 77 132 89 40 54 31
## FOSL1 36 25 81 344 396 257
## CCND1 9082 10633 10557 5388 3991 4013
## JRKL 380 411 245 414 375 333
## MMP7 729 819 223 144 16 216
## MMP1 73 117 22 8 458 50
## MMP3 21 15 7 0 32 28
## CASP1 1357 915 1017 1491 2292 1479
## DLAT 887 974 1084 1578 1686 1308
## IL18 3556 2544 2735 1985 1996 1827
## CD3E 122 81 49 423 549 345
## MCAM 2337 2294 1643 2489 1990 2143
## WNK1 7027 10941 11076 9562 10100 7525
## TNFRSF1A 3438 3577 3607 4991 6088 5199
## GAPDH 33604 27190 23246 43955 58803 55806
## CD4 894 508 549 950 1261 998
## SLC2A3 241 208 227 301 605 450
## CLEC4D 0 0 0 2 7 2
## KLRB1 17 6 5 62 33 116
## CD69 41 18 21 62 85 62
## CLEC2B 2157 1630 1637 727 973 1768
## OLR1 7 3 0 6 39 12
## ABCD2 97 14 12 51 39 23
## LRRK2 529 537 701 444 427 552
## VDR 1594 2340 1945 2399 2536 2199
## TMBIM6 13796 13202 13006 17712 18014 18126
## PFDN5 5185 3872 3721 2333 2512 2603
## SP7 0 0 4 1 0 1
## MUCL1 6885 7520 996 331 410 894
## CD63 11121 6977 6339 4954 6328 5779
## SUOX 3244 3269 3208 2803 3371 2985
## RPS26 2164 613 1665 2057 2918 2222
## RPL41 11011 8564 8684 7134 7882 7874
## IL23A 11 21 28 22 67 29
## DDIT3 368 337 240 172 209 229
## IFNG 0 0 2 5 39 15
## IL22 0 0 0 7 3 0
## LYZ 2155 686 490 4797 7244 3966
## CAPS2 90 137 102 40 28 24
## SYT1 42 25 39 14 13 7
## BTG1 8856 8568 8802 6557 9679 8673
## HCAR3 491 243 418 795 1446 969
## GJB2 3249 4965 3678 64488 90408 92711
## GJB6 2653 4570 2622 18982 20610 28537
## TNFSF11 9 19 5 9 9 12
## TPT1 76653 62481 62318 56006 56410 63884
## LMO7 1971 2271 2514 2824 4868 4144
## TNFSF13B 221 100 44 155 273 203
## LAMP1 8786 9634 9737 11985 12960 12388
## PSME2 1460 1149 1109 2302 4048 2118
## GZMB 10 6 6 125 443 104
## RPL36AL 2938 2678 2647 3276 3624 2868
## PYGL 1611 1651 1513 5572 5797 4008
## HIF1A 2151 2592 2240 5814 6482 5620
## FOS 6468 2826 3537 1165 387 493
## JDP2 1301 989 1189 655 903 847
## NOXRED1 66 54 35 35 27 38
## SERPINA1 80 225 40 374 460 423
## CDC42BPB 4093 4401 4673 5422 6042 4846
## PLA2G4D 664 824 627 10122 18713 11060
## TRIM69 11 25 20 27 40 23
## SLC51B 26 29 16 11 16 14
## SMAD3 2270 1694 1586 1476 2027 1815
## CYP1A1 39 57 33 2 1 0
## AKAP13 4103 4249 4813 3589 3826 3337
## MEFV 21 3 8 44 77 121
## SOCS1 121 72 43 183 502 219
## ATXN2L 3425 3101 2721 2922 3250 2644
## CD19 10 3 5 11 7 4
## ITGAL 197 76 48 320 484 328
## ITGAM 369 120 111 326 423 291
## ITGAX 298 90 65 162 456 307
## DNAJA2 2394 2403 2575 2884 3407 3145
## SIAH1 1321 1380 1045 722 941 838
## ADCY7 1958 1396 1448 1812 2443 1871
## NOD2 674 736 785 2342 2652 2273
## CMTM2 2 8 0 0 1 3
## SF3B3 3484 3348 3766 5778 5726 5310
## PSMD7 2562 2628 2805 3500 3934 3139
## WWOX 188 300 291 356 556 349
## MAF 8936 10259 9266 6527 9465 6829
## SLC7A5 1163 1536 1745 10386 9383 6721
## CXCL16 958 923 862 1733 2644 1511
## XAF1 1231 729 490 1294 6812 2266
## CD68 1143 443 364 1134 2000 1500
## PER1 7287 2847 4421 969 1948 813
## NOS2 5 24 2 413 1995 314
## TRAF4 870 771 568 489 555 433
## CCL2 338 128 158 641 649 569
## CCL5 106 79 55 187 681 235
## CCL3 1 0 0 10 144 47
## CCR7 70 23 13 236 463 170
## STAT3 5826 6901 6898 16942 23378 16687
## SOST 19 33 6 80 107 19
## ITGA2B 39 15 13 25 13 11
## EFCAB13 194 162 97 48 40 60
## NPEPPS 3710 3857 3652 4404 4966 4116
## TBX21 9 13 3 10 46 31
## COL1A1 72107 58198 49649 30620 34531 67618
## MRPS23 756 669 606 739 763 673
## MIR21 1 1 1 1 3 1
## RPS6KB1 1093 1358 1441 1446 1648 1483
## ACE 707 624 506 445 608 438
## ERN1 930 1240 985 1015 1314 1117
## PRTN3 0 0 0 0 0 0
## PLIN5 187 252 101 530 1117 1024
## RETN 3 0 0 0 0 0
## CCL25 3 1 1 1 0 0
## ICAM1 537 412 323 516 1222 798
## TYK2 3067 3097 2477 1875 2234 1870
## SMARCA4 3232 4023 3971 5077 5024 4819
## ACP5 766 799 782 1831 2852 3038
## JUNB 3139 3286 2748 8267 9530 7686
## CYP4F22 1931 1886 2153 6544 6521 6486
## JAK3 223 145 82 331 442 270
## JUND 5193 4503 6325 3244 3525 3206
## CEBPA 6974 5750 10927 9597 11672 9198
## CEBPG 2460 2299 2871 2731 2974 2485
## ACP7 624 338 203 4366 6627 4074
## ZFP36 2878 1458 1574 3463 3898 2893
## TGFB1 1679 1214 1050 1853 2311 1953
## RPS19 25550 19038 22501 14779 17825 16667
## PSG2 0 0 4 4 7 0
## FUT2 239 217 79 514 1226 1450
## NKG7 53 16 22 38 156 62
## ZNF415 248 195 136 56 19 58
## LILRB2 129 37 11 112 322 147
## LILRA5 6 3 0 6 43 12
## KIR3DL1 0 0 1 0 3 0
## KIR3DL2 0 3 0 1 3 3
## SMOX 489 380 297 1487 1692 1927
## BMP2 633 916 1400 374 699 325
## GINS1 208 227 212 579 660 547
## PLCG1 3279 3506 2809 3339 3544 3006
## YWHAB 9896 8992 10191 12568 13840 12313
## PI3 173 82 97 75302 127198 125561
## MMP9 277 69 68 355 1717 452
## CD40 621 444 392 308 675 391
## PFDN4 405 270 326 298 268 314
## BMP7 1762 2164 2297 1990 1917 2006
## RPS21 9258 7206 8348 6510 6578 7711
## MX1 769 530 416 2436 16568 5935
## PFKL 5747 5097 4994 5395 5662 4705
## IL17RA 1190 1247 1069 1367 1298 1242
## UBE2L3 2231 1708 1952 2827 3108 2735
## TPST2 1041 681 984 889 1095 757
## XBP1 4132 3210 2197 4298 5594 4171
## UQCR10 876 676 736 986 1117 1122
## SEC14L2 485 724 600 363 597 898
## APOL6 796 583 514 2866 8482 3429
## APOL1 374 248 217 2230 9910 4964
## TYMP 1264 958 885 9497 30950 7858
## BEND2 0 0 1 1 0 0
## PHEX 21 27 11 47 45 53
## TIMP1 1875 766 659 903 799 1264
## FOXP3 89 85 70 173 367 166
## MSN 8062 7047 7402 13422 14911 11752
## PGK1 6936 6988 7092 13078 14341 15024
## LAMP2 5302 6312 6501 7227 8785 7985
## CD40LG 16 21 2 57 29 48
## IRAK1 3171 2893 2954 5946 6667 5148
## COX2 100671 104963 86881 113125 104720 158408
## GSM6222624 GSM6222625 GSM6222626 GSM6222627 GSM6222628 GSM6222629
## TNFRSF9 62 32 60 53 38 84
## ENO1 21148 28181 32621 19056 26046 40368
## PIK3CD 401 612 572 624 712 871
## PGD 3722 5226 4342 4022 2532 5567
## MTHFR 1407 2186 1893 2652 1813 1940
## TNFRSF1B 784 876 836 858 909 2048
## PINK1 1348 1624 2274 2002 1978 1591
## IFNLR1 343 473 498 456 493 469
## RUNX3 845 713 1068 782 853 1384
## SH3BGRL3 4702 5224 5789 4207 6394 6713
## CD52 145 133 179 159 323 405
## IFI6 12766 18464 14861 7094 1772 4668
## ZC3H12A 8663 9789 10150 6890 5095 12994
## UTP11 903 1112 1172 780 943 1334
## JUN 1189 1680 2112 1527 3632 1696
## KANK4 3 14 2 21 16 2
## EFCAB7 84 163 175 193 128 132
## IL23R 4 6 5 2 0 12
## ADGRL2 1154 1628 1383 1127 1126 2070
## GBP3 967 1279 1338 1232 647 700
## GBP1 1791 2320 2162 1762 2174 2599
## GBP5 226 388 278 423 175 1119
## TGFBR3 870 1162 1493 919 1214 1639
## VCAM1 99 187 109 252 414 778
## PTPN22 113 377 74 260 87 390
## CD160 33 19 11 25 11 7
## FCGR1A 15 19 8 20 6 140
## MCL1 10526 10548 10758 8733 12113 12414
## CTSK 2015 3475 2329 4668 3827 10410
## RORC 70 118 146 439 162 215
## S100A9 169007 262610 224978 159909 116870 257930
## S100A12 843 1212 852 687 385 1639
## S100A8 101757 177411 142933 113632 80752 162953
## IL6R 825 861 765 900 796 783
## RIT1 908 1235 1300 1324 1231 1197
## BGLAP 20 21 11 13 16 11
## IFI16 6352 10484 10107 7957 7179 12821
## AIM2 29 74 36 50 28 136
## CRP 0 2 0 0 3 0
## FCGR2A 199 202 146 193 275 827
## HSPA6 455 316 448 343 508 220
## FCGR3A 272 216 331 330 309 476
## SELL 72 131 81 101 263 503
## GLUL 17711 14372 21782 13591 10212 13880
## PTGS2 37 196 77 155 101 456
## CRB1 1 0 1 2 2 0
## KDM5B 3207 4208 5852 4413 3515 4406
## IL10 5 3 9 9 22 16
## YOD1 4886 2563 2385 1864 2151 1965
## HHAT 147 287 283 366 128 190
## TRAF5 246 330 141 432 282 295
## NLRP3 44 46 27 59 79 254
## LINC01250 1 2 0 0 1 0
## RPS7 7693 11415 11084 10304 8491 12496
## RSAD2 956 1427 951 484 169 1389
## FOSL2 8392 10079 12260 10248 10953 14224
## REL 2541 2991 3775 3326 2697 3375
## TGFA 1712 1612 1785 1250 1267 2495
## DYSF 180 522 345 273 447 751
## HK2 3692 5411 4299 4094 3107 8120
## CD8A 227 277 238 252 164 357
## CD8B 64 52 51 52 68 103
## EIF5B 4929 6277 8435 4692 5345 7329
## IL1R1 2014 3739 3708 3982 3219 8063
## RGPD6 468 621 610 596 563 483
## IL1A 4 54 36 21 9 114
## IL1B 35 374 97 198 36 1435
## IL37 318 103 124 219 179 20
## IL36RN 11716 10019 11804 6747 10384 11588
## IL1F10 76 61 70 65 62 56
## IL1RN 4839 4972 5097 3256 2921 6840
## NMI 794 1220 1348 804 1067 1100
## TNFAIP6 32 37 36 29 1853 531
## IFIH1 1569 2220 2189 1436 720 1372
## SCN1A 2 10 1 5 0 3
## ABCB11 55 99 76 75 24 13
## RBM45 195 289 248 240 254 266
## FRZB 180 577 500 563 269 595
## TFPI 278 590 391 441 617 801
## STAT1 10173 13884 13669 12573 6123 10686
## NABP1 365 750 396 661 842 744
## SF3B1 6976 10404 9046 10811 9064 10007
## CASP10 940 1010 870 947 750 695
## CD28 46 93 75 93 119 212
## CTLA4 115 79 73 81 32 101
## ICOS 58 54 52 46 84 131
## NDUFS1 2415 3769 3756 3164 2824 4139
## CXCR2 627 720 1289 766 189 814
## IRS1 460 447 414 323 599 1077
## CCL20 211 87 199 78 70 96
## ATG16L1 875 826 853 812 892 849
## GPR35 38 42 21 66 29 51
## PDCD1 59 58 53 41 36 70
## PPARG 39 103 55 73 145 303
## RPL15 15227 20333 22946 20683 18923 25572
## EOMES 36 33 15 35 15 41
## CX3CR1 50 125 139 195 95 195
## ACKR2 340 377 254 236 533 354
## CCR2 86 158 114 177 285 256
## CCRL2 21 42 17 30 28 57
## TLR9 9 17 14 13 23 41
## ARHGEF3 886 1410 1241 1492 1146 1538
## ADAMTS9 90 149 85 139 274 447
## TMEM45A 28079 30043 35104 30705 24915 25514
## CD80 25 13 17 20 17 15
## CD86 111 114 98 134 222 232
## MIX23 235 351 298 243 236 270
## PLS1 165 245 284 173 183 402
## PTX3 13 61 22 53 22 250
## GOLIM4 683 1022 1258 905 1090 1699
## MYNN 510 718 680 504 578 590
## TNFSF10 2252 4271 5166 4991 4805 5400
## ADIPOQ 36 147 74 171 239 327
## SPON2 354 646 700 646 549 500
## S100P 458 794 310 791 177 984
## WDR1 7819 8296 9335 7415 8435 13342
## CD38 27 62 34 65 33 84
## PPARGC1A 41 122 165 78 73 184
## TMPRSS11B 0 0 1 0 0 0
## CSN3 0 0 0 0 0 0
## ALB 3 3 1 6 0 0
## CXCL8 128 1988 372 1530 131 7255
## CXCL2 30 52 23 50 24 232
## AREG 128 246 150 123 108 310
## CXCL10 96 89 156 165 267 173
## CXCL13 172 59 130 30 13 979
## SPP1 17 21 24 29 4 32
## HERC6 4102 5997 4639 3073 1023 3067
## NFKB1 2582 3336 4292 2938 3488 3853
## SEC24B 1217 1416 1542 1524 1400 1649
## EGF 8 13 28 48 15 7
## IL2 0 3 0 0 0 0
## IL21 2 4 4 2 4 2
## IL21-AS1 4 5 2 1 1 1
## SLC7A11 636 843 605 489 553 1044
## IL15 73 75 70 113 115 155
## EDNRA 135 472 310 473 425 1223
## TLR2 342 526 502 689 410 732
## FGB 1 0 0 0 0 0
## DDX60 3455 3741 4402 2544 1546 1650
## SPCS3 2658 3172 3437 2511 3466 4394
## TLR3 139 298 286 235 226 186
## OSMR 1590 2327 2264 1914 2838 5108
## GZMK 49 37 30 35 8 163
## GZMA 42 93 69 84 61 269
## ANKRD55 7 5 2 3 4 3
## CENPK 109 232 141 220 178 192
## CAST 9717 9565 12847 10348 9650 12313
## ERAP1 1630 2959 3172 3262 3149 2884
## ERAP2 214 3083 204 349 1856 245
## TNFAIP8 889 1212 1426 1479 1336 1549
## CSF2 3 6 2 2 4 4
## IL5 1 0 1 1 3 0
## IL13 1 3 0 2 76 2
## IL4 2 1 1 2 1 0
## CD14 424 665 414 499 544 2260
## CSNK1A1 17042 21748 22142 16004 18356 19776
## PPARGC1B 1363 1374 1459 1290 988 1416
## TNIP1 4745 4426 5005 4785 3770 7212
## ATOX1 701 840 769 727 711 854
## FAXDC2 1393 1596 1714 2165 1252 1245
## IL12B 9 7 15 18 12 16
## MIR146A 4 6 4 4 2 4
## PDLIM7 982 1228 1026 1429 1006 1256
## SERPINB1 1649 4210 3067 1428 1301 8119
## SSR1 3399 4741 4760 3600 3912 6527
## CD83 377 281 347 301 289 422
## SOX4 539 2275 1557 2489 863 2724
## CMAHP 80 185 118 201 324 233
## HLA-A 24238 24702 34078 27347 29258 29451
## HLA-C 26548 22129 24915 33140 23389 23826
## HLA-B 51917 41146 58790 53369 43262 39922
## MICA 301 423 331 382 312 182
## LTA 6 19 17 26 23 18
## TNF 56 66 97 65 71 134
## HLA-DRB1 5267 3736 4019 4439 8778 9805
## HLA-DQB1 3816 1852 1733 646 4543 4141
## PPARD 5794 3997 4254 3487 4443 4432
## CCND3 1259 1427 1702 1037 1191 1619
## VEGFA 1518 1354 1622 1408 2716 1544
## RUNX2 97 231 145 260 131 355
## IL17A 15 50 50 40 9 21
## IL17F 11 28 59 9 2 17
## PRDM1 3026 2489 3429 2281 2433 3730
## ATG5 449 690 750 652 643 879
## TRAF3IP2 1580 1710 1862 1595 1688 2389
## NCOA7 1178 1793 1748 1466 1458 4636
## SGK1 4777 6391 8597 7474 6426 7350
## IFNGR1 2762 5322 4017 4252 3971 8366
## TNFAIP3 887 758 698 900 912 1463
## SOD2 9460 13191 20169 10320 10115 45179
## LPAL2 20 21 15 13 4 6
## PLG 3 5 6 3 4 2
## CCR6 93 72 116 105 115 207
## RAC1 9489 11469 14547 10689 9978 14953
## ZNF316 1999 2631 2309 2906 1913 1748
## AHR 1994 4391 3268 3786 4964 7181
## IL6 2 6 7 9 8 124
## TOMM7 1998 2854 2860 2510 2337 3019
## CYCS 4211 6151 5966 3143 3950 7547
## AQP1 3822 7049 6290 8181 6447 12500
## NT5C3A 2724 2549 2801 1886 1724 2281
## EGFR 6060 8511 10146 8771 8036 10468
## CD36 7465 8457 7192 8671 7111 3706
## SAMD9 2662 3748 2715 2167 722 4127
## SERPINE1 277 138 128 96 205 602
## CUX1 2584 3129 3828 2508 3580 3884
## PSMC2 2570 2823 3118 2278 2787 3042
## NAMPT 6369 10963 11271 5863 5128 28090
## HYAL4 426 523 448 495 144 613
## LEP 10 22 19 56 41 9
## CALD1 2912 4839 4094 5629 3759 7333
## BRAF 975 1238 1118 1300 990 1012
## EZH2 1032 1612 1052 1412 1037 1049
## DNAJB6 6748 6963 7901 5279 6395 8040
## CSMD1 20 11 2 46 7 16
## CTSB 14316 27087 25751 17348 20450 40774
## EGR3 3152 2280 2507 1879 2923 1535
## TNFRSF10A 473 514 418 272 524 514
## BNIP3L 4675 4056 4890 5460 4570 5504
## DUSP4 464 382 496 324 311 919
## NRG1 552 375 341 153 502 785
## RPL7 14455 18100 21974 21033 18181 24441
## IL7 97 191 233 215 196 230
## GEM 143 204 171 261 107 496
## MYC 3759 2949 4208 3192 3478 3498
## GPT 392 315 438 723 245 209
## JAK2 749 1071 1263 1207 1348 1068
## CD274 262 594 374 258 444 1441
## IL33 922 3634 2656 1919 2498 1721
## LURAP1L-AS1 8 5 1 11 4 1
## LURAP1L 284 491 530 472 1433 543
## TTC39B 2254 2945 2590 2859 2481 2680
## IFNA1 2 0 0 0 1 0
## TEK 149 383 199 285 255 469
## TRBV20OR9-2 1 0 2 0 0 1
## TOMM5 823 1058 1068 643 786 1037
## ANXA1 5004 7430 5459 3628 4903 26907
## ERP44 1422 1560 1832 1383 1431 1935
## ZNF483 93 135 100 182 128 114
## TNFSF15 21 47 42 66 49 107
## TLR4 196 200 148 274 330 572
## PTGS1 5618 6986 6869 14000 8891 5928
## HSPA5 13311 11727 14826 7977 11006 16867
## FNBP1 2204 2517 2609 3121 2551 3508
## CARD9 203 198 189 315 287 163
## TRAF2 471 562 653 669 818 485
## CLIC3 1811 1846 2325 1823 1908 1812
## IL2RA 77 57 69 61 71 227
## GATA3 2197 2665 3674 4991 4537 1868
## VIM 15657 14784 18380 17817 23131 23102
## CREM 177 327 277 274 276 459
## DKK1 8 2 7 3 3 10
## MBL2 0 0 1 0 0 0
## SAR1A 1967 2527 2629 2274 2563 3345
## PRF1 102 80 81 95 73 171
## ZMIZ1 3050 3693 4150 4388 3336 4794
## IFIT3 1630 3805 3064 2225 946 2608
## TALDO1 3908 4461 4785 3585 3395 5329
## IGF2 307 411 289 263 276 667
## INS-IGF2 276 370 255 237 242 597
## IGF2-AS 0 2 1 1 1 1
## INS 0 0 0 0 0 0
## STIM1 2052 3057 3222 2253 3208 3452
## TRIM22 4107 6612 4484 4814 1698 3904
## PTH 0 0 0 0 0 0
## SAA1 278 875 179 230 279 1530
## SLC1A2 23 23 19 27 41 22
## FOSL1 920 270 177 71 290 360
## CCND1 3958 4064 6421 3409 5253 2475
## JRKL 146 346 323 304 465 285
## MMP7 43 170 204 63 168 331
## MMP1 8 42 23 51 76 1091
## MMP3 0 40 28 26 96 353
## CASP1 1089 1944 1637 1617 1050 1587
## DLAT 988 1210 1404 913 1141 1905
## IL18 1738 2000 2225 2189 2510 1111
## CD3E 271 293 225 246 271 490
## MCAM 1376 2335 1780 2329 1628 5723
## WNK1 6524 7724 9866 8366 10115 8456
## TNFRSF1A 4236 5196 6213 4623 5265 6338
## GAPDH 31031 39542 47957 34264 36985 72106
## CD4 587 617 545 778 1249 1309
## SLC2A3 202 402 310 468 438 1307
## CLEC4D 0 5 4 5 2 46
## KLRB1 71 44 37 55 44 146
## CD69 53 59 38 82 49 219
## CLEC2B 943 1439 1358 2223 2113 1592
## OLR1 4 44 4 26 4 32
## ABCD2 15 22 14 38 43 47
## LRRK2 230 618 372 597 363 588
## VDR 1910 2439 2459 2445 2023 2957
## TMBIM6 11668 15244 17521 13581 13453 20381
## PFDN5 1834 2726 2696 2625 2448 2881
## SP7 0 0 0 4 0 0
## MUCL1 716 2284 1374 1790 676 1698
## CD63 3644 4849 5487 5383 7254 10286
## SUOX 2415 2794 3207 2833 2641 2332
## RPS26 975 2038 2213 1784 1494 2330
## RPL41 5909 7372 8164 6751 5916 7464
## IL23A 34 43 38 44 16 57
## DDIT3 216 263 236 303 201 248
## IFNG 6 10 18 20 18 13
## IL22 3 6 6 14 1 13
## LYZ 2620 2757 1103 2205 2202 6504
## CAPS2 31 42 46 49 25 49
## SYT1 13 4 13 25 8 21
## BTG1 4201 8771 11661 13360 6598 13145
## HCAR3 671 655 811 880 1084 1200
## GJB2 44112 73982 66903 40904 38785 115153
## GJB6 13818 22575 14539 14468 11576 27714
## TNFSF11 7 11 11 10 53 16
## TPT1 44416 59149 65525 58654 62357 77825
## LMO7 4358 3229 3598 4445 2322 2645
## TNFSF13B 96 147 72 160 151 328
## LAMP1 8373 11542 11048 10824 9722 12841
## PSME2 2225 2627 2644 1958 1885 2501
## GZMB 151 121 111 132 113 371
## RPL36AL 2518 3216 3512 2756 2620 4050
## PYGL 3178 2797 5540 3477 4020 3209
## HIF1A 3127 5430 4524 4073 3978 13427
## FOS 861 1289 468 455 1189 672
## JDP2 881 944 1110 758 943 697
## NOXRED1 31 68 46 59 39 22
## SERPINA1 192 452 291 348 178 1198
## CDC42BPB 4083 5456 5569 5262 4882 6007
## PLA2G4D 14648 14925 14105 8902 10599 9736
## TRIM69 35 41 16 57 19 18
## SLC51B 11 16 18 21 15 21
## SMAD3 1677 1750 2517 2302 1277 2037
## CYP1A1 15 3 0 2 2 84
## AKAP13 3153 3198 2938 3877 3775 3888
## MEFV 53 107 64 65 27 308
## SOCS1 181 272 223 158 321 510
## ATXN2L 2849 3572 3060 3345 2667 3239
## CD19 4 4 4 10 5 51
## ITGAL 259 325 186 332 342 549
## ITGAM 245 300 156 629 566 239
## ITGAX 195 421 101 472 303 615
## DNAJA2 2626 3149 3425 2599 2858 3130
## SIAH1 788 924 1011 1044 755 948
## ADCY7 1647 1749 1678 1763 2328 1727
## NOD2 2603 3167 2537 2691 2273 2016
## CMTM2 0 7 1 10 4 11
## SF3B3 4005 6099 6677 5465 5079 5567
## PSMD7 2722 3383 3750 2769 2908 3951
## WWOX 540 781 399 503 441 170
## MAF 4458 7962 10411 10125 11648 4906
## SLC7A5 7146 12051 8086 4718 3943 12272
## CXCL16 1480 1539 2130 1417 1405 2619
## XAF1 5774 7118 4650 4218 1471 1643
## CD68 858 883 711 863 1552 1926
## PER1 2704 2606 3151 771 1064 958
## NOS2 635 366 583 229 74 1417
## TRAF4 736 1010 769 573 561 843
## CCL2 216 511 384 407 945 1990
## CCL5 149 114 115 259 94 498
## CCL3 19 20 40 56 32 165
## CCR7 223 192 136 112 127 255
## STAT3 13885 16818 19566 14505 11752 28849
## SOST 22 19 41 8 25 93
## ITGA2B 11 39 8 44 27 29
## EFCAB13 56 95 55 127 74 41
## NPEPPS 3259 4843 4033 4662 4831 4797
## TBX21 10 25 9 22 9 34
## COL1A1 13991 30923 29376 37496 82884 198659
## MRPS23 559 726 834 570 681 837
## MIR21 3 4 2 3 3 2
## RPS6KB1 1286 1649 1801 1231 1351 1884
## ACE 264 587 354 385 693 1458
## ERN1 1091 1283 1222 1493 1078 1161
## PRTN3 0 0 0 0 0 0
## PLIN5 1035 1284 891 1128 460 1086
## RETN 0 3 0 4 0 5
## CCL25 0 1 0 0 0 0
## ICAM1 448 463 443 759 732 1852
## TYK2 2343 3059 2153 3509 2516 1982
## SMARCA4 4045 4920 5475 4726 4158 5482
## ACP5 1849 2936 2206 2577 2316 2053
## JUNB 4628 8623 8045 4928 7682 8732
## CYP4F22 5209 5174 5653 5193 4867 4836
## JAK3 256 358 172 384 241 727
## JUND 4793 3502 4106 2721 3340 4357
## CEBPA 10724 8897 11189 8623 6516 8431
## CEBPG 2409 2597 3025 2150 2450 2828
## ACP7 5277 3884 3677 3347 3589 3897
## ZFP36 2381 3229 2982 2342 3484 4225
## TGFB1 1558 1553 2364 1527 2283 2573
## RPS19 13509 17786 20781 18541 15143 19731
## PSG2 2 0 8 3 0 2
## FUT2 591 857 821 585 874 945
## NKG7 44 52 60 65 49 197
## ZNF415 41 80 104 95 27 63
## LILRB2 75 121 69 95 188 369
## LILRA5 5 10 1 9 18 82
## KIR3DL1 1 0 1 1 0 2
## KIR3DL2 4 1 0 7 0 3
## SMOX 1777 1078 1108 1099 1153 1262
## BMP2 387 317 442 255 266 1568
## GINS1 215 410 419 357 396 526
## PLCG1 3058 3673 3388 4479 3240 2667
## YWHAB 8853 10979 13841 9193 9604 14458
## PI3 57480 85380 74670 58951 19422 155270
## MMP9 345 332 257 199 472 988
## CD40 285 351 349 451 409 486
## PFDN4 179 316 292 230 243 366
## BMP7 1768 1557 1528 1248 845 1206
## RPS21 5295 7622 7936 6812 6441 8234
## MX1 7539 12256 9962 4929 2172 8424
## PFKL 4567 5670 6095 5808 5361 6156
## IL17RA 1124 1281 1383 1394 1321 1577
## UBE2L3 2123 2491 2937 1933 2164 2931
## TPST2 668 1130 1323 1299 1239 850
## XBP1 3909 4771 5418 3726 4015 4997
## UQCR10 784 1218 1178 819 871 1330
## SEC14L2 644 694 545 922 316 678
## APOL6 2975 4362 4150 3665 3012 3553
## APOL1 2710 3511 5440 1616 1307 4203
## TYMP 18863 25128 21380 15144 8594 21539
## BEND2 0 0 0 0 0 0
## PHEX 26 75 46 54 39 87
## TIMP1 469 828 743 764 1658 2292
## FOXP3 202 169 223 239 130 274
## MSN 7111 9427 12036 7893 10637 15873
## PGK1 8218 11293 13001 8146 10710 17294
## LAMP2 4530 7864 8182 7428 6901 8494
## CD40LG 14 40 19 33 85 34
## IRAK1 4677 5182 6329 3901 4687 6158
## COX2 82308 84352 125211 104007 70696 97337
## GSM6222630 GSM6222631 GSM6222632 GSM6222633 GSM6222634 GSM6222635
## TNFRSF9 3 17 3 12 5 6
## ENO1 11762 20261 13415 15318 9816 13704
## PIK3CD 346 427 329 349 459 394
## PGD 1635 4114 2327 3923 1888 3012
## MTHFR 1124 1403 1323 1071 1146 755
## TNFRSF1B 532 591 327 412 485 526
## PINK1 1958 2767 2026 3036 2320 3063
## IFNLR1 469 542 582 579 721 407
## RUNX3 743 1060 960 713 1063 921
## SH3BGRL3 3037 5727 2759 3583 2761 3010
## CD52 100 220 44 153 179 107
## IFI6 276 607 345 792 350 355
## ZC3H12A 514 1243 907 1347 787 931
## UTP11 465 634 665 782 698 542
## JUN 2019 2488 1824 1472 1807 2298
## KANK4 135 140 62 110 44 92
## EFCAB7 147 218 208 234 214 183
## IL23R 0 1 1 0 0 1
## ADGRL2 1236 1125 992 1160 1079 1564
## GBP3 363 560 269 567 625 396
## GBP1 460 614 593 520 504 425
## GBP5 38 41 24 45 67 50
## TGFBR3 2731 2344 3204 1590 2298 1940
## VCAM1 136 164 71 120 197 113
## PTPN22 41 49 19 49 49 32
## CD160 24 16 32 17 23 11
## FCGR1A 11 8 7 5 8 6
## MCL1 6721 7583 8874 7435 8461 6320
## CTSK 6899 5230 2953 2200 7605 3183
## RORC 755 1380 1331 1572 1476 1606
## S100A9 607 1818 264 849 505 1092
## S100A12 7 0 2 2 12 19
## S100A8 320 2075 196 832 458 876
## IL6R 643 484 710 613 624 482
## RIT1 501 739 561 818 809 586
## BGLAP 18 27 20 19 34 26
## IFI16 2194 2604 2655 2338 2664 1887
## AIM2 10 6 11 7 20 4
## CRP 0 0 0 0 0 0
## FCGR2A 145 179 73 123 181 175
## HSPA6 161 223 359 215 197 180
## FCGR3A 157 72 108 238 59 258
## SELL 44 17 13 18 84 153
## GLUL 7140 10298 6076 10523 7753 10579
## PTGS2 127 55 115 78 97 50
## CRB1 5 3 8 1 4 6
## KDM5B 2531 3338 3812 2940 3157 2529
## IL10 2 3 2 0 1 0
## YOD1 3216 1790 3385 2465 2799 1139
## HHAT 198 260 299 199 206 297
## TRAF5 347 173 442 341 408 175
## NLRP3 42 24 15 15 55 17
## LINC01250 0 1 1 3 0 2
## RPS7 8609 17449 13335 14019 15454 12876
## RSAD2 51 65 60 123 83 64
## FOSL2 7695 7499 12292 9687 9162 7975
## REL 1559 1247 2514 1384 1820 1160
## TGFA 872 1188 1157 1416 1155 946
## DYSF 370 344 251 166 389 612
## HK2 1358 2413 1178 1740 1062 2978
## CD8A 88 124 35 71 126 36
## CD8B 40 89 23 34 67 46
## EIF5B 4310 7403 5812 4891 5005 5073
## IL1R1 2356 2314 2312 2719 4012 2176
## RGPD6 648 476 1086 698 864 618
## IL1A 31 54 12 38 39 99
## IL1B 14 94 8 51 23 134
## IL37 2096 1349 1385 2013 2256 949
## IL36RN 2482 2121 3265 2345 3171 1025
## IL1F10 173 116 139 128 148 71
## IL1RN 2256 3367 3228 3507 2917 1878
## NMI 316 479 340 380 429 343
## TNFAIP6 112 88 74 24 78 44
## IFIH1 399 458 412 568 516 355
## SCN1A 2 0 2 1 16 4
## ABCB11 35 57 98 131 157 85
## RBM45 193 215 238 209 224 155
## FRZB 705 709 597 364 609 1058
## TFPI 481 358 317 369 801 403
## STAT1 2157 2476 2509 3266 2692 1836
## NABP1 151 193 120 274 413 191
## SF3B1 8011 7267 11311 8287 11273 6741
## CASP10 326 258 408 573 537 293
## CD28 18 34 2 20 40 17
## CTLA4 5 8 5 3 11 3
## ICOS 4 4 4 10 9 8
## NDUFS1 2975 3152 3418 3798 2924 2970
## CXCR2 91 155 96 80 111 339
## IRS1 1160 508 831 711 596 646
## CCL20 5 13 1 18 17 8
## ATG16L1 1115 1127 1384 1065 1271 803
## GPR35 40 26 109 76 85 72
## PDCD1 13 8 0 11 10 6
## PPARG 348 594 63 657 238 487
## RPL15 22132 34629 27508 27615 30241 25953
## EOMES 11 12 2 5 13 9
## CX3CR1 39 87 29 119 99 92
## ACKR2 40 36 28 34 32 30
## CCR2 36 99 13 44 120 53
## CCRL2 13 7 4 14 12 5
## TLR9 16 9 11 21 28 9
## ARHGEF3 913 787 958 733 921 736
## ADAMTS9 191 181 220 117 328 225
## TMEM45A 16518 28882 19647 27808 19737 23758
## CD80 2 3 6 2 3 2
## CD86 74 144 59 113 145 57
## MIX23 144 292 288 230 281 187
## PLS1 163 145 262 149 143 147
## PTX3 17 45 27 15 47 36
## GOLIM4 1289 968 966 705 1045 1198
## MYNN 571 572 683 585 735 508
## TNFSF10 1118 1744 825 1284 1556 1392
## ADIPOQ 2594 395 148 39 72 565
## SPON2 1355 1187 1678 802 1224 1050
## S100P 63 339 133 70 152 333
## WDR1 6350 7079 6788 6134 5225 5741
## CD38 16 7 3 8 6 6
## PPARGC1A 465 446 659 452 365 821
## TMPRSS11B 0 0 0 0 0 0
## CSN3 0 0 0 0 0 0
## ALB 1 2 12 5 4 6
## CXCL8 14 5 1 12 6 8
## CXCL2 13 9 11 3 8 5
## AREG 88 188 86 85 66 198
## CXCL10 24 6 12 6 21 16
## CXCL13 0 2 1 2 6 0
## SPP1 341 23 1 73 7 45
## HERC6 165 271 263 294 235 251
## NFKB1 1561 1826 2285 1849 2176 1623
## SEC24B 1447 1440 2051 1357 1608 1153
## EGF 97 53 157 75 171 84
## IL2 0 0 0 1 0 0
## IL21 0 0 0 0 0 0
## IL21-AS1 0 0 0 1 0 0
## SLC7A11 126 119 176 91 108 53
## IL15 32 82 41 61 97 33
## EDNRA 448 474 232 280 317 566
## TLR2 171 221 95 226 234 345
## FGB 0 0 0 0 0 1
## DDX60 987 734 1208 1480 1063 673
## SPCS3 2212 2796 3033 2544 2648 2359
## TLR3 133 139 199 130 189 88
## OSMR 1022 886 900 859 1171 793
## GZMK 21 30 9 11 37 4
## GZMA 20 47 2 10 29 17
## ANKRD55 1 1 0 10 11 5
## CENPK 65 167 142 114 138 107
## CAST 11506 9891 13434 12529 10677 9399
## ERAP1 3111 2484 3817 2338 3227 2045
## ERAP2 822 1111 134 213 1967 103
## TNFAIP8 805 1385 1010 1062 1233 669
## CSF2 0 0 0 1 1 0
## IL5 1 3 1 1 1 5
## IL13 0 2 4 1 3 1
## IL4 1 2 2 1 2 2
## CD14 503 467 203 258 310 317
## CSNK1A1 10375 11738 13508 10108 11252 8012
## PPARGC1B 437 585 440 663 400 539
## TNIP1 2462 3270 2474 2835 2896 2906
## ATOX1 372 631 435 489 526 358
## FAXDC2 2018 3368 1172 4443 2204 3959
## IL12B 0 3 0 3 1 2
## MIR146A 1 0 1 1 1 0
## PDLIM7 907 1396 945 932 684 928
## SERPINB1 640 883 807 629 792 748
## SSR1 3286 4057 3693 3463 3346 3332
## CD83 94 122 145 122 173 67
## SOX4 1730 2168 1976 977 1555 2116
## CMAHP 835 440 1177 615 1520 548
## HLA-A 12585 26067 20679 17078 19566 17953
## HLA-C 13209 21499 20701 19103 17458 12522
## HLA-B 20874 49978 37526 34645 31099 26231
## MICA 312 359 264 356 386 245
## LTA 15 13 24 8 13 14
## TNF 26 45 30 98 119 18
## HLA-DRB1 2219 4187 1992 4568 3879 2432
## HLA-DQB1 380 1055 506 3517 2089 1031
## PPARD 2097 1842 1881 2321 1897 1595
## CCND3 978 1288 1058 1378 1235 1169
## VEGFA 878 1519 726 1552 1661 1659
## RUNX2 120 110 132 82 169 93
## IL17A 0 0 0 0 0 0
## IL17F 0 0 0 0 0 0
## PRDM1 1265 1225 1528 1426 1335 1153
## ATG5 471 779 624 673 663 531
## TRAF3IP2 1294 1511 1689 1365 1278 1114
## NCOA7 654 777 702 618 603 476
## SGK1 1197 2360 1373 1624 1589 2975
## IFNGR1 2011 2874 2282 2271 2845 2279
## TNFAIP3 1202 641 2103 1106 1500 474
## SOD2 4798 5813 4309 4501 4793 4581
## LPAL2 5 4 3 2 12 2
## PLG 8 4 4 7 11 3
## CCR6 118 115 70 139 187 74
## RAC1 8307 12185 11561 8694 9341 7646
## ZNF316 1950 2128 2320 2142 3170 2262
## AHR 2645 3104 4163 2350 2837 2100
## IL6 3 3 0 4 2 9
## TOMM7 2060 5023 3257 3747 4022 4065
## CYCS 2044 3275 2255 2822 2336 2521
## AQP1 7066 7897 8550 6047 6638 8942
## NT5C3A 1064 1503 988 1200 1310 944
## EGFR 11049 6420 14845 9228 10970 8883
## CD36 3693 1766 602 1998 1285 4896
## SAMD9 223 158 143 351 263 264
## SERPINE1 305 60 125 167 189 71
## CUX1 2506 2157 2663 2297 2457 1681
## PSMC2 2155 3391 2949 2778 2584 2340
## NAMPT 1635 2633 2145 1929 1874 2088
## HYAL4 11 24 16 24 17 31
## LEP 1967 32 30 20 55 320
## CALD1 6591 7502 6272 4918 4759 5049
## BRAF 1325 910 1627 1279 1548 989
## EZH2 630 659 700 904 1102 408
## DNAJB6 4156 4317 4403 5132 4001 3678
## CSMD1 99 47 52 98 20 12
## CTSB 10424 12677 8841 9260 9037 15460
## EGR3 3547 3133 3275 3784 3600 2905
## TNFRSF10A 231 140 249 265 235 154
## BNIP3L 4457 7196 5066 7302 4951 6042
## DUSP4 703 930 397 1471 384 1017
## NRG1 609 275 727 564 390 443
## RPL7 21153 37494 28511 29166 27534 27016
## IL7 171 203 281 202 329 157
## GEM 400 258 180 234 332 322
## MYC 1497 1926 2170 1873 1204 1321
## GPT 853 2868 805 3630 1297 1829
## JAK2 952 728 1433 878 1233 684
## CD274 24 25 10 19 28 36
## IL33 706 1106 909 1073 2222 1268
## LURAP1L-AS1 0 5 10 6 1 0
## LURAP1L 330 529 330 334 470 418
## TTC39B 1933 3536 1668 3277 2278 2443
## IFNA1 0 0 0 0 0 0
## TEK 260 140 202 180 269 284
## TRBV20OR9-2 0 3 1 0 0 0
## TOMM5 489 1007 663 739 697 695
## ANXA1 9010 7140 7436 7502 6992 6793
## ERP44 1162 1463 1524 1394 1411 1209
## ZNF483 242 226 260 146 210 241
## TNFSF15 16 28 10 41 57 34
## TLR4 550 164 221 161 300 165
## PTGS1 7176 7913 12507 8130 10966 4866
## HSPA5 6792 10163 9492 10509 6445 8731
## FNBP1 2472 2006 2942 2185 2671 1831
## CARD9 59 216 90 219 242 136
## TRAF2 330 517 291 547 517 392
## CLIC3 1330 1781 1635 1338 2065 1191
## IL2RA 5 16 0 16 11 9
## GATA3 6087 7146 12256 6969 9929 4166
## VIM 28366 26066 20460 15420 21700 21404
## CREM 214 341 278 261 246 306
## DKK1 50 19 12 6 46 29
## MBL2 0 0 0 0 0 0
## SAR1A 2160 2878 2816 2647 2519 2187
## PRF1 36 54 7 37 42 62
## ZMIZ1 4281 2514 4675 3063 3340 3164
## IFIT3 268 375 274 240 331 282
## TALDO1 2578 5229 2807 5300 3001 3803
## IGF2 426 435 445 388 326 604
## INS-IGF2 392 402 417 369 298 558
## IGF2-AS 0 1 6 1 2 0
## INS 0 0 0 0 0 0
## STIM1 2961 2991 4302 2847 3312 2883
## TRIM22 475 675 507 864 992 488
## PTH 0 0 0 0 0 0
## SAA1 308 559 33 739 223 371
## SLC1A2 105 65 92 106 74 78
## FOSL1 141 52 86 58 24 20
## CCND1 11068 8542 14699 11294 14292 9331
## JRKL 260 364 211 340 433 208
## MMP7 249 248 369 213 744 303
## MMP1 3 13 12 0 4 7
## MMP3 4 6 26 2 8 0
## CASP1 705 1054 935 878 1099 1051
## DLAT 959 1017 1162 1044 873 797
## IL18 2476 3997 2858 2887 3451 2434
## CD3E 110 116 24 86 133 60
## MCAM 3093 3596 2302 2642 1320 2999
## WNK1 10162 5564 14185 9769 9489 7166
## TNFRSF1A 3265 4944 3873 3697 3518 2855
## GAPDH 15763 36535 25066 22449 19800 24867
## CD4 563 639 269 367 604 372
## SLC2A3 207 289 195 244 211 304
## CLEC4D 1 0 0 1 0 1
## KLRB1 16 26 4 9 15 14
## CD69 17 38 12 26 49 11
## CLEC2B 695 1502 1698 1199 1546 854
## OLR1 0 2 1 2 2 2
## ABCD2 107 9 9 7 20 14
## LRRK2 489 307 595 328 487 437
## VDR 1817 2566 1815 2411 1921 2259
## TMBIM6 14568 21004 13778 24202 14601 17724
## PFDN5 2802 5099 3473 4206 4267 3939
## SP7 1 3 0 1 0 0
## MUCL1 1647 4229 2999 1612 1525 5480
## CD63 6722 10956 5945 6053 6961 7490
## SUOX 3126 4079 3816 4069 3496 2972
## RPS26 1394 4768 1922 1021 1894 1850
## RPL41 6213 13884 8456 10131 9608 9655
## IL23A 6 19 20 9 6 12
## DDIT3 221 394 244 355 382 305
## IFNG 1 1 0 1 0 0
## IL22 0 0 0 0 1 0
## LYZ 769 1136 167 815 1574 473
## CAPS2 78 77 94 57 103 90
## SYT1 29 16 19 26 26 39
## BTG1 4988 11168 10120 6672 9743 7812
## HCAR3 351 480 359 460 628 137
## GJB2 2654 5518 1373 2192 1745 4174
## GJB6 1554 2651 1482 1560 1306 2085
## TNFSF11 7 43 7 20 16 19
## TPT1 43043 74332 58059 62731 65858 63349
## LMO7 2140 1701 2492 2404 2442 1394
## TNFSF13B 67 118 30 78 145 77
## LAMP1 9444 11222 10416 9920 9086 9081
## PSME2 835 1481 981 1340 1303 1030
## GZMB 13 9 3 3 9 16
## RPL36AL 2169 4541 2505 3484 2835 3489
## PYGL 1750 1867 1684 1484 1856 1250
## HIF1A 1875 1917 2816 1714 1853 2175
## FOS 3176 1249 2428 2538 3933 862
## JDP2 1106 1144 1616 845 1167 999
## NOXRED1 53 49 42 66 73 43
## SERPINA1 44 170 5 32 78 314
## CDC42BPB 4225 2424 4962 3354 4191 3067
## PLA2G4D 794 734 665 986 1156 1612
## TRIM69 9 17 12 24 28 9
## SLC51B 30 38 25 18 7 32
## SMAD3 1503 1310 1862 1757 1740 1310
## CYP1A1 4 115 44 183 95 3
## AKAP13 4934 3148 5814 3503 4221 3615
## MEFV 8 4 4 11 9 50
## SOCS1 90 156 56 107 137 101
## ATXN2L 2271 1981 2810 2632 3639 1791
## CD19 1 3 10 5 4 2
## ITGAL 103 95 27 93 169 95
## ITGAM 192 176 60 73 178 109
## ITGAX 109 109 26 99 201 176
## DNAJA2 1963 2607 2655 2384 2563 2042
## SIAH1 860 1304 1546 1194 1131 1063
## ADCY7 1266 1154 1466 1369 1663 816
## NOD2 628 737 917 921 992 464
## CMTM2 3 5 1 6 3 15
## SF3B3 3241 3830 3563 3944 3970 2925
## PSMD7 2080 3724 3312 3162 2446 2743
## WWOX 242 236 270 266 188 156
## MAF 6577 9835 10604 7514 9242 6259
## SLC7A5 1199 2121 2216 2380 980 1543
## CXCL16 852 1301 909 1102 915 956
## XAF1 411 351 308 1186 983 289
## CD68 739 477 224 355 472 305
## PER1 5214 5862 9903 4639 5911 7098
## NOS2 9 7 3 2 3 13
## TRAF4 474 714 787 827 664 614
## CCL2 104 362 105 188 120 162
## CCL5 126 112 17 50 75 46
## CCL3 9 2 0 2 8 0
## CCR7 28 25 9 33 48 33
## STAT3 6360 8240 6590 7011 6791 6613
## SOST 0 2 0 0 0 7
## ITGA2B 20 20 16 31 41 32
## EFCAB13 129 105 86 154 193 69
## NPEPPS 2839 2956 3552 3407 4068 2336
## TBX21 15 2 1 6 4 9
## COL1A1 19917 162326 9996 16923 25004 26661
## MRPS23 477 815 690 682 596 559
## MIR21 1 2 1 1 1 3
## RPS6KB1 1028 1026 1453 1107 1251 880
## ACE 560 366 398 280 514 461
## ERN1 952 809 1025 989 1274 856
## PRTN3 1 1 0 1 1 0
## PLIN5 615 3658 113 5683 963 3251
## RETN 3 2 0 0 4 0
## CCL25 2 0 0 0 0 2
## ICAM1 394 296 338 248 356 294
## TYK2 1808 1854 2541 2304 2990 1742
## SMARCA4 3267 3694 4356 3838 3604 3141
## ACP5 771 1538 629 1685 947 1305
## JUNB 3145 5139 3482 4058 4337 3645
## CYP4F22 2554 4202 2772 3613 2316 2465
## JAK3 107 99 74 98 165 127
## JUND 6747 7217 8089 4788 5302 6037
## CEBPA 6179 11674 13441 9268 7868 7214
## CEBPG 2023 2153 3046 2657 2998 2082
## ACP7 468 624 277 623 649 629
## ZFP36 1997 1909 2082 1801 2279 1506
## TGFB1 1302 1245 1233 1204 1073 973
## RPS19 13733 28522 21456 20894 20443 18581
## PSG2 1 2 3 4 7 0
## FUT2 74 169 108 123 167 87
## NKG7 22 29 4 15 36 29
## ZNF415 96 135 86 283 198 206
## LILRB2 39 36 6 23 47 54
## LILRA5 4 0 0 1 4 10
## KIR3DL1 0 0 0 1 1 0
## KIR3DL2 0 0 0 0 1 1
## SMOX 488 708 383 484 357 592
## BMP2 1225 509 2343 601 852 371
## GINS1 125 302 163 227 143 160
## PLCG1 2423 1966 2821 2847 3077 2263
## YWHAB 8129 12701 10124 9645 8576 8834
## PI3 105 273 89 116 165 114
## MMP9 348 99 38 121 139 73
## CD40 296 404 406 458 586 219
## PFDN4 215 402 273 278 317 256
## BMP7 2028 1934 2461 1728 1540 1161
## RPS21 4525 11263 8063 7668 8678 8817
## MX1 396 360 366 667 348 307
## PFKL 5251 7211 5398 6916 6265 5573
## IL17RA 1163 972 1191 1302 1077 1069
## UBE2L3 1623 2717 1741 2234 1906 1849
## TPST2 633 897 744 812 1041 838
## XBP1 2934 5311 3473 4887 3069 3900
## UQCR10 544 1418 655 1159 802 1044
## SEC14L2 353 763 369 465 208 828
## APOL6 683 724 408 609 609 562
## APOL1 242 247 207 192 302 235
## TYMP 778 2415 432 2030 1240 1491
## BEND2 0 0 0 0 0 1
## PHEX 12 18 16 15 22 17
## TIMP1 853 1309 574 552 1241 597
## FOXP3 51 75 68 109 78 66
## MSN 7847 6392 7892 5649 6914 5681
## PGK1 5948 9950 7683 7460 6137 6637
## LAMP2 5778 7769 7263 6398 6481 5891
## CD40LG 18 28 3 26 40 19
## IRAK1 2386 3442 2769 2889 2395 2483
## COX2 71940 116849 125419 119408 73020 114342
## GSM6222636 GSM6222637 GSM6222638
## TNFRSF9 7 15 7
## ENO1 14933 15446 13633
## PIK3CD 417 401 486
## PGD 2636 1943 3215
## MTHFR 1416 1154 1181
## TNFRSF1B 644 908 791
## PINK1 2680 2345 2103
## IFNLR1 550 611 509
## RUNX3 1444 1067 827
## SH3BGRL3 3050 3758 2907
## CD52 94 107 186
## IFI6 567 688 636
## ZC3H12A 921 908 711
## UTP11 742 706 598
## JUN 1440 2701 1523
## KANK4 101 181 162
## EFCAB7 206 199 170
## IL23R 0 0 0
## ADGRL2 1124 1164 1265
## GBP3 527 342 289
## GBP1 550 647 488
## GBP5 52 78 125
## TGFBR3 2830 4868 2779
## VCAM1 199 279 174
## PTPN22 27 45 52
## CD160 29 19 11
## FCGR1A 9 12 11
## MCL1 6318 8940 5943
## CTSK 6396 10133 5952
## RORC 1867 914 1528
## S100A9 239 1255 1086
## S100A12 3 5 32
## S100A8 154 1262 880
## IL6R 847 966 759
## RIT1 812 797 552
## BGLAP 32 26 15
## IFI16 2367 2368 2387
## AIM2 13 15 23
## CRP 1 0 0
## FCGR2A 128 173 238
## HSPA6 167 211 165
## FCGR3A 53 78 130
## SELL 60 51 162
## GLUL 8676 8071 7756
## PTGS2 85 187 77
## CRB1 7 5 9
## KDM5B 3123 2615 2454
## IL10 3 6 0
## YOD1 2157 2502 1810
## HHAT 212 202 188
## TRAF5 488 510 343
## NLRP3 21 51 46
## LINC01250 4 1 0
## RPS7 14108 12839 12844
## RSAD2 94 121 119
## FOSL2 8460 8834 7166
## REL 1410 1843 1498
## TGFA 887 1061 1019
## DYSF 395 527 368
## HK2 1588 2040 2287
## CD8A 92 81 78
## CD8B 30 43 35
## EIF5B 6452 5640 4745
## IL1R1 3243 2840 2944
## RGPD6 824 1158 641
## IL1A 55 46 81
## IL1B 44 4 68
## IL37 1865 1372 1476
## IL36RN 2469 1973 1423
## IL1F10 131 220 115
## IL1RN 1959 1895 1860
## NMI 392 356 368
## TNFAIP6 81 312 48
## IFIH1 511 224 433
## SCN1A 16 7 11
## ABCB11 113 44 92
## RBM45 244 182 181
## FRZB 1387 829 771
## TFPI 582 691 439
## STAT1 2536 2663 2753
## NABP1 251 238 229
## SF3B1 10577 11071 8462
## CASP10 422 542 332
## CD28 19 16 50
## CTLA4 1 13 26
## ICOS 2 5 22
## NDUFS1 3449 2415 3399
## CXCR2 144 69 218
## IRS1 725 1290 757
## CCL20 40 5 1
## ATG16L1 1259 1232 997
## GPR35 73 81 79
## PDCD1 14 8 12
## PPARG 439 532 515
## RPL15 29355 23562 26452
## EOMES 12 13 15
## CX3CR1 131 50 142
## ACKR2 37 62 42
## CCR2 76 52 106
## CCRL2 19 20 4
## TLR9 15 9 11
## ARHGEF3 885 872 757
## ADAMTS9 287 349 206
## TMEM45A 14857 16142 15786
## CD80 1 1 2
## CD86 85 96 100
## MIX23 250 176 185
## PLS1 142 136 179
## PTX3 109 51 37
## GOLIM4 1446 1733 979
## MYNN 745 691 568
## TNFSF10 1494 1335 1330
## ADIPOQ 823 4751 1189
## SPON2 1766 1572 1022
## S100P 216 183 177
## WDR1 7973 7484 5620
## CD38 9 39 6
## PPARGC1A 732 410 737
## TMPRSS11B 0 0 0
## CSN3 0 0 0
## ALB 13 15 8
## CXCL8 5 5 3
## CXCL2 7 30 4
## AREG 49 121 109
## CXCL10 11 44 17
## CXCL13 0 0 0
## SPP1 9 9 4
## HERC6 291 221 250
## NFKB1 2291 1823 1603
## SEC24B 1676 1903 1410
## EGF 133 86 70
## IL2 0 0 0
## IL21 0 0 0
## IL21-AS1 1 0 0
## SLC7A11 100 225 132
## IL15 76 94 62
## EDNRA 503 395 467
## TLR2 182 177 235
## FGB 0 0 1
## DDX60 1267 822 873
## SPCS3 2871 3047 2295
## TLR3 150 158 190
## OSMR 1062 1438 1053
## GZMK 18 19 32
## GZMA 36 33 33
## ANKRD55 3 1 3
## CENPK 120 79 102
## CAST 11088 12048 9743
## ERAP1 3353 3048 2619
## ERAP2 151 1244 181
## TNFAIP8 1062 774 927
## CSF2 0 0 0
## IL5 5 4 2
## IL13 2 6 0
## IL4 2 3 2
## CD14 351 1006 645
## CSNK1A1 10533 10310 8067
## PPARGC1B 604 360 499
## TNIP1 3022 3142 2759
## ATOX1 491 510 452
## FAXDC2 2471 1149 2954
## IL12B 1 1 1
## MIR146A 0 0 1
## PDLIM7 1700 1624 977
## SERPINB1 910 1104 840
## SSR1 3831 3454 3260
## CD83 161 129 106
## SOX4 1959 1929 1535
## CMAHP 960 681 486
## HLA-A 21808 23513 23468
## HLA-C 28420 22228 19459
## HLA-B 40077 32914 31482
## MICA 358 392 194
## LTA 14 26 16
## TNF 60 46 56
## HLA-DRB1 3520 5345 5198
## HLA-DQB1 489 2244 2291
## PPARD 1736 1895 1660
## CCND3 1327 1248 1069
## VEGFA 784 1059 1460
## RUNX2 169 165 135
## IL17A 0 0 0
## IL17F 0 0 0
## PRDM1 1179 1258 1278
## ATG5 764 679 641
## TRAF3IP2 1413 1305 1190
## NCOA7 729 915 751
## SGK1 2192 1851 1308
## IFNGR1 2561 2739 2508
## TNFAIP3 828 1115 731
## SOD2 5835 5471 5096
## LPAL2 8 11 11
## PLG 2 3 8
## CCR6 132 89 129
## RAC1 9716 9490 7582
## ZNF316 2699 2863 1976
## AHR 2701 3437 2367
## IL6 1 17 4
## TOMM7 3954 3822 4152
## CYCS 2703 2137 2335
## AQP1 12863 14865 8721
## NT5C3A 1002 982 1096
## EGFR 12151 10421 9519
## CD36 3694 7114 4080
## SAMD9 243 124 280
## SERPINE1 146 322 95
## CUX1 2446 2237 2065
## PSMC2 2836 2992 2346
## NAMPT 2084 2025 2241
## HYAL4 16 19 35
## LEP 488 2418 713
## CALD1 10898 6721 6064
## BRAF 1450 1518 1107
## EZH2 788 667 588
## DNAJB6 4386 3908 3403
## CSMD1 123 49 35
## CTSB 8857 12456 9401
## EGR3 2888 2578 2795
## TNFRSF10A 243 298 175
## BNIP3L 5031 5691 6257
## DUSP4 667 504 1019
## NRG1 264 739 229
## RPL7 29231 22518 25762
## IL7 230 161 138
## GEM 350 437 247
## MYC 1563 1684 1069
## GPT 1517 408 1370
## JAK2 1255 1124 1022
## CD274 31 41 25
## IL33 1175 1038 1294
## LURAP1L-AS1 4 9 9
## LURAP1L 460 513 456
## TTC39B 1827 1278 2748
## IFNA1 0 0 0
## TEK 435 386 331
## TRBV20OR9-2 1 0 0
## TOMM5 726 751 660
## ANXA1 7942 12059 6413
## ERP44 1379 1555 1142
## ZNF483 257 281 224
## TNFSF15 42 12 41
## TLR4 373 601 292
## PTGS1 9932 8850 5873
## HSPA5 8933 7674 7536
## FNBP1 2801 2451 2179
## CARD9 198 93 149
## TRAF2 470 311 386
## CLIC3 1613 1196 1005
## IL2RA 7 27 22
## GATA3 8736 6057 6211
## VIM 27502 42810 25233
## CREM 313 401 266
## DKK1 21 49 9
## MBL2 0 2 0
## SAR1A 3136 3417 2508
## PRF1 25 31 66
## ZMIZ1 3877 4280 3634
## IFIT3 344 552 513
## TALDO1 3168 2560 3663
## IGF2 565 841 585
## INS-IGF2 524 753 542
## IGF2-AS 4 3 8
## INS 0 0 0
## STIM1 3859 3526 2702
## TRIM22 680 833 800
## PTH 0 0 0
## SAA1 496 3901 452
## SLC1A2 183 73 114
## FOSL1 32 152 18
## CCND1 13015 8574 7425
## JRKL 323 204 275
## MMP7 236 333 307
## MMP1 3 14 7
## MMP3 3 23 0
## CASP1 896 777 834
## DLAT 1125 1075 1096
## IL18 2598 2123 2290
## CD3E 68 51 141
## MCAM 4591 6352 3534
## WNK1 9487 11192 9041
## TNFRSF1A 3872 3541 3353
## GAPDH 24329 25029 23798
## CD4 562 719 701
## SLC2A3 265 520 305
## CLEC4D 1 0 13
## KLRB1 15 8 16
## CD69 9 28 32
## CLEC2B 1254 1434 1138
## OLR1 0 0 1
## ABCD2 48 220 80
## LRRK2 507 645 451
## VDR 2289 2137 2333
## TMBIM6 18021 13187 18023
## PFDN5 4280 3696 3788
## SP7 5 3 1
## MUCL1 5705 1838 2708
## CD63 9843 12867 8238
## SUOX 3588 2847 2919
## RPS26 2035 1539 1813
## RPL41 9274 7100 7963
## IL23A 15 17 6
## DDIT3 400 351 300
## IFNG 0 1 1
## IL22 0 0 0
## LYZ 434 546 1218
## CAPS2 107 77 95
## SYT1 46 67 39
## BTG1 8378 6164 7999
## HCAR3 263 247 196
## GJB2 2143 3681 2697
## GJB6 1895 2993 1775
## TNFSF11 5 5 6
## TPT1 67536 61483 60565
## LMO7 2337 2045 1521
## TNFSF13B 78 123 147
## LAMP1 11831 10477 9299
## PSME2 1262 1151 1073
## GZMB 13 7 15
## RPL36AL 3449 2631 3146
## PYGL 1681 2133 1957
## HIF1A 2676 2678 2390
## FOS 1800 2712 1399
## JDP2 1296 1707 889
## NOXRED1 50 33 31
## SERPINA1 48 164 186
## CDC42BPB 4696 4646 3405
## PLA2G4D 575 489 637
## TRIM69 35 19 10
## SLC51B 43 28 24
## SMAD3 1825 1685 1640
## CYP1A1 15 52 146
## AKAP13 4062 6188 3454
## MEFV 14 17 44
## SOCS1 106 141 95
## ATXN2L 2821 2174 1821
## CD19 13 15 6
## ITGAL 100 59 195
## ITGAM 128 245 243
## ITGAX 109 77 218
## DNAJA2 2583 2605 2073
## SIAH1 1196 1237 1029
## ADCY7 1408 1368 1253
## NOD2 583 748 500
## CMTM2 9 4 16
## SF3B3 3699 2509 3359
## PSMD7 2797 2979 2582
## WWOX 294 338 157
## MAF 9153 7892 6820
## SLC7A5 2340 1019 2806
## CXCL16 1111 983 768
## XAF1 554 401 538
## CD68 413 1061 654
## PER1 2423 7782 2013
## NOS2 64 16 27
## TRAF4 874 636 628
## CCL2 287 152 131
## CCL5 61 85 155
## CCL3 2 5 0
## CCR7 22 53 31
## STAT3 7536 6665 7088
## SOST 9 0 5
## ITGA2B 30 20 41
## EFCAB13 120 102 88
## NPEPPS 3548 3320 2865
## TBX21 3 7 7
## COL1A1 57202 207331 36821
## MRPS23 774 745 585
## MIR21 0 0 1
## RPS6KB1 1411 1498 1104
## ACE 556 1199 693
## ERN1 1036 1134 973
## PRTN3 0 2 0
## PLIN5 1092 174 1834
## RETN 4 0 6
## CCL25 0 0 0
## ICAM1 757 796 451
## TYK2 2602 2660 1849
## SMARCA4 4217 3450 3216
## ACP5 704 749 907
## JUNB 3548 5264 3565
## CYP4F22 2721 2257 2376
## JAK3 117 112 158
## JUND 5000 10082 4473
## CEBPA 7421 10130 7565
## CEBPG 2514 2546 2076
## ACP7 359 344 495
## ZFP36 1480 2848 1488
## TGFB1 1258 1686 1184
## RPS19 21909 18619 17280
## PSG2 0 3 3
## FUT2 128 99 103
## NKG7 25 21 47
## ZNF415 229 66 212
## LILRB2 27 91 105
## LILRA5 7 5 4
## KIR3DL1 0 0 0
## KIR3DL2 0 0 0
## SMOX 583 388 583
## BMP2 698 936 621
## GINS1 212 126 152
## PLCG1 2947 3042 2365
## YWHAB 10719 9419 8505
## PI3 130 124 135
## MMP9 51 104 109
## CD40 383 344 294
## PFDN4 297 371 267
## BMP7 1831 1923 1359
## RPS21 8643 7188 7859
## MX1 376 682 613
## PFKL 6077 5365 5702
## IL17RA 1352 1159 1272
## UBE2L3 1900 1806 1816
## TPST2 722 966 663
## XBP1 4711 3080 3835
## UQCR10 960 742 982
## SEC14L2 340 599 367
## APOL6 771 1166 800
## APOL1 418 382 359
## TYMP 942 1324 1398
## BEND2 0 0 4
## PHEX 23 26 16
## TIMP1 1040 2214 997
## FOXP3 70 56 84
## MSN 7737 8447 6742
## PGK1 7086 7425 6810
## LAMP2 6949 5656 6274
## CD40LG 6 10 26
## IRAK1 3177 3623 2457
## COX2 118935 79507 86169
Selection of samples related to psoriatic arthritis from the dataset
# sample selection
gsms <- "000000000111111111222222222"
sml <- strsplit(gsms, split="")[[1]]
# Filter out excluded samples (marked as "X")
sel <- which(sml != "X")
sml <- sml[sel]
expr_data <- expr_data_filtered[, sel]
# group membership for samples
gs <- factor(sml)
groups <- make.names(c("control","lesion PsA","non lesion PsA"))
levels(gs) <- groups
sample_info <- data.frame(Group = gs, row.names = colnames(expr_data))
# pre-filter low count genes
dsa <- DESeqDataSetFromMatrix(countData=expr_data, colData=sample_info, design= ~Group)
dsb <- DESeq(dsa, test="LRT", reduced = ~ 1) # Use LRT for all-around gene ranking
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 3 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
# extract results for top genes table
r <- results(dsb, alpha=0.05, pAdjustMethod ="fdr")
plotDispEsts(dsb, main="GSE205748 Dispersion Estimates")
# create histogram plot of p-values
hist(r$padj, breaks=seq(0, 1, length = 21), col = "grey", border = "white",
xlab = "", ylab = "", main = "GSE205748 Frequencies of padj-values")
#Depois de filtrar quais os grupos apresentam algum genes expressos
cts <- list(c("Group",groups[2],groups[1]),
c("Group",groups[3],groups[1]),
c("Group",groups[3],groups[2]))
# Wald test to obtain contrast-specific results
dsc <- DESeq(dsa, test="Wald", sfType="poscount")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 4 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
r <- results (dsc, contrast=cts[[1]], alpha=0.05, pAdjustMethod = "fdr")
# Venn diagram
library(gplots)
all_res <- list()
for (ct in cts) {
i <- length(all_res)
r <- results(dsc, contrast=ct, alpha=0.05, pAdjustMethod = "fdr")
all_res[[i + 1]] <- rownames(r)[!is.na(r$padj) & r$padj < 0.05 & abs(r$log2FoldChange) >= 1]
names(all_res)[i + 1] <- paste(ct, collapse="_")
}
venn(all_res)
# Differential gene expression analysis
df <- as.data.frame(r)
df <- na.omit(df)
topGenes <- rownames(df)
heatmapData <- expr_data[topGenes, ]
# Load the necessary libraries
library(pheatmap)
library(RColorBrewer)
# Calculate the average expression for each gene
media_dos_genes <- rowMeans(heatmapData)
# Sort the averages in descending order
media_ordenada <- sort(media_dos_genes, decreasing = TRUE)
# Get the names of the first 20 genes
top_20_genes_por_media <- names(head(media_ordenada, 20))
# Filter the original matrix to retain only the 20 selected genes
heatmapData_top20 <- heatmapData[top_20_genes_por_media, ]
# Create column annotation with groups
annotation_data <- data.frame(
group = sample_info$Group,
row.names = rownames(sample_info)
)
# Reorder columns by group
ordem_grupo <- order(annotation_data$group) # Ordena por nome do grupo (alfabética)
heatmapData_top20 <- heatmapData_top20[, ordem_grupo]
annotation_data <- annotation_data[ordem_grupo, , drop = FALSE]
# Install if necessary
# install.packages("ComplexHeatmap")
library(ComplexHeatmap)
library(circlize)
# Matrix of the 20 genes
mat <- heatmapData_top20
# Reschedule by line
mat_scaled <- t(scale(t(mat)))
# Create group annotation
ha_col <- HeatmapAnnotation(
Group = annotation_data$group,
col = list(Group = c("lesion.PsA" = "#d62728",
"non.lesion.PsA" = "#1f77b4",
"control" = "green"))
)
# Inverted RdBu colour palette
cores <- colorRamp2(c(-2, 0, 2), rev(RColorBrewer::brewer.pal(3, "RdBu")))
# Heatmap with clustering within groups
Heatmap(
mat_scaled,
name = "Z-score",
top_annotation = ha_col,
col = cores,
cluster_columns = TRUE, # << agora está ativado
cluster_column_slices = TRUE, # << cluster dentro de cada grupo
column_split = annotation_data$group, # << separa as colunas por grupo
cluster_rows = TRUE,
show_column_names = FALSE,
show_row_names = TRUE,
column_title = "Heatmap of the 20 most highly expressed genes (average) from GSE205748",
heatmap_legend_param = list(title = "Z-score")
)
# UMAP plot (multi-dimensional scaling)
expr_data_umap <- expr_data[rowSums(expr_data) > 0, ] # Remover linhas com soma zero
u <- umap(t(expr_data_umap), n_neighbors=15, random_state=123)
plot(u$layout, main="GSE205748 UMAP", xlab="", ylab="", tcl=0.1, pch=19, col="blue")
text(u$layout, labels=colnames(expr_data_umap), cex=0.7, pos=3)
# Assuming that rownames(df) are gene symbols
gene_symbols <- rownames(df)
# Convert gene symbols to Entrez IDs
gene_entrez_ids <- bitr(gene_symbols, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
## 'select()' returned 1:1 mapping between keys and columns
# First, convert the rownames from df into a column
df$Symbol <- rownames(df)
# Join the original table and the converted IDs
dfd <- merge(df, gene_entrez_ids, by.x = "Symbol", by.y = "SYMBOL", all.x = TRUE)
# Ensure unique symbols
dfd <- dfd[!duplicated(dfd$ENTREZID), ]
# Wald test to obtain contrast-specific results
dsd <- DESeq(dsa, test = "Wald", sfType = "poscount")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 4 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
r <- results(dsd, contrast = c("Group", groups[3], groups[2]), alpha = 0.05, pAdjustMethod = "fdr")
# Filter only significant genes
sig_genes <- subset(r, padj < 0.05 & abs(log2FoldChange) >= 1)
# volcano plot
old.pal <- palette(c("#00BFFF", "#FF3030")) # low-hi colors
par(mar=c(4,4,2,1), cex.main=1.5)
with(sig_genes, {
plot(log2FoldChange, -log10(padj), main=paste(groups[3], "vs", groups[2]),
xlab="log2FC", ylab="-log10(Padj)", pch=20, cex=0.5)
text(log2FoldChange, -log10(padj), labels=rownames(r), cex=0.6, pos=4)
})
with(subset(r, padj<0.05 & abs(log2FoldChange) >= 1),
points(log2FoldChange, -log10(padj), pch=20, col=(sign(log2FoldChange) + 3)/2, cex=1))
legend("bottomleft", title=paste("Padj<", 0.05, sep=""), legend=c("down", "up"), pch=20,col=1:2)
# Plot only significant genes
#MD PLOT
par(mar=c(4,4,2,1), cex.main=1.5)
with(sig_genes, {
plot(log10(baseMean), log2FoldChange,
main=paste(groups[3], "vs", groups[2]),
xlab="log10(mean of normalized counts)", ylab="log2FoldChange",
pch=20, col=(sign(log2FoldChange) + 3)/2, cex=1)
text(log10(baseMean), log2FoldChange, labels=rownames(sig_genes), cex=0.6, pos=4)
})
legend("bottomleft", title=paste("Padj<", 0.05, sep=""), legend=c("down", "up"), pch=20, col=1:2)
abline(h=0)
palette(old.pal) # restaurar paleta
# Add the gene symbols to the points on the graph.
library(ggplot2)
library(ggrepel)
plotVolcano <- function(res, title = "Volcano Plot") {
res$group <- "NS"
res$group[res$padj < 0.05 & res$log2FoldChange > 1] <- "Up"
res$group[res$padj < 0.05 & res$log2FoldChange < -1] <- "Down"
res$label <- ifelse(res$group != "NS", rownames(res), NA)
ggplot(res, aes(x = log2FoldChange, y = -log10(padj), color = group)) +
geom_point(alpha = 0.7, size = 2) +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed") +
scale_color_manual(values = c("Up" = "firebrick", "Down" = "dodgerblue", "NS" = "grey80")) +
ggrepel::geom_text_repel(aes(label = label), size = 3, max.overlaps = Inf) +
theme_minimal() +
labs(title = title, x = "log2 Fold Change", y = "-log10 adjusted p-value", color = "Regulation")
}
# Example of function usage
plotVolcano(r, paste(groups[3], "vs", groups[2]))
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 265 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
# Genes upregulated
up_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange >= 1))
# Genes downregulated
down_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange <= -1))
# All DEGs with |log2FC| > 2
de_genes <- c(up_genes, down_genes)
# Display gene symbols before conversion
expr_datasa <- as.data.frame(expr_data[rownames(expr_data) %in% de_genes, ])
# Convert expression data to long format for ggplot2
expr_datas <- as.data.frame(expr_datasa)
expr_datas$Gene <- rownames(expr_datas)
expr_datas <- pivot_longer(expr_datas, cols = -Gene, names_to = "Sample", values_to = "Expression")
expr_datas <- merge(expr_datas, pheno_data, by.x = "Sample", by.y = "geo_accession")
# Check column names and a sample of the data
expresse <- r[order(r$padj)[1:429], ]
expresse <- merge(as.data.frame(expresse), expr_datasa, by = 0, sort = F)
library(dplyr)
library(tibble)
library(clusterProfiler)
library(ggplot2)
r_df <- as.data.frame(r)
# This ensures that gene names are preserved
r_df_com_genes <- r_df %>%
rownames_to_column(var = "Gene")
# --- Now, the rest of your code works perfectly ---
top30_fc <- r_df_com_genes %>%
# O 'filter' também pode ser conflitante, é bom usar o prefixo
dplyr::filter(padj < 0.05 & abs(log2FoldChange) > 1) %>%
dplyr::arrange(desc(abs(log2FoldChange))) %>%
# Esta é a correção principal:
dplyr::slice(1:30) %>%
dplyr::mutate(
Regulation = case_when(
log2FoldChange > 1 ~ "Upregulated",
log2FoldChange < -1 ~ "Downregulated"
)
)
# Graph with positive and negative bars (your code here is perfect)
ggplot(top30_fc, aes(x = reorder(Gene, log2FoldChange), y = log2FoldChange, fill = Regulation)) +
geom_col() +
coord_flip() + # Coloca os genes no eixo Y
scale_fill_manual(values = c("Upregulated" = "steelblue", "Downregulated" = "tomato")) +
scale_y_continuous(breaks = seq(-8, 4, by = 1)) + # Ajuste os limites se necessário
theme_bw() +
labs(
title = "The 30 most prominent DEGs between non-lesion and lesion PsA (GSE205748)",
x = "Gene",
y = "log2 Fold Change",
fill = "Regulation"
) +
theme(
axis.text.y = element_text(color = "black", face = "bold", size = 9)
)
# --- Select the 20 MOST overexpressed genes ---
up_20genes <- expresse %>%
filter(padj < 0.05 & log2FoldChange > 1) %>%
arrange(desc(log2FoldChange)) %>%
head(20) %>%
pull(Row.names)
# --- Filters expression data ---
expr_datas_filtrado_up <- expr_datas %>%
filter(`tissue type:ch1` %in% c("Psoriatic arthritis skin uninvolved", "Psoriatic arthritis skin lesion")) %>%
filter(Gene %in% up_20genes)
# --- Calculate the average per gene and per group ---
expr_datasa_up <- expr_datas_filtrado_up %>%
group_by(Gene, `tissue type:ch1`) %>%
summarise(
mean_expression = mean(Expression, na.rm = TRUE),
sd_expression = sd(Expression, na.rm = TRUE),
.groups = 'drop'
)
# --- Preparation for the chart ---
dados_para_plotar <- expr_datasa_up %>%
filter(`tissue type:ch1` == "Psoriatic arthritis skin uninvolved")
# --- Graph Generation ---
ggplot(dados_para_plotar, aes(x = mean_expression, y = reorder(Gene, mean_expression))) +
geom_col(fill = "steelblue", color = "black", width = 0.7) +
labs(
title = "Average Expression of the Main Upregulated Genes in Psoriatic arthritis skin uninvolved", # Título mais claro
subtitle = "For non-lesion PsA vs lesion PsA (GSE205748)",
x = "Average Expression in the Group 'skin uninvolved'", # Rótulo do eixo mais claro
y = "Gene"
) +
theme_minimal(base_size = 14) +
theme(
panel.grid.major.y = element_blank(),
axis.text.y = element_text(size = 12)
)
# --- Selects the 20 MOST under-expressed genes ---
down_20genes <- expresse %>%
filter(padj < 0.05 & log2FoldChange < -1) %>%
arrange(log2FoldChange) %>% # <-- Sort from most negative to least negative
head(20) %>%
pull(Row.names)
# --- Filters expression data for genes and groups of interest ---
expr_datas_filtrado_down <- expr_datas %>%
filter(`tissue type:ch1` %in% c("Psoriatic arthritis skin uninvolved", "Psoriatic arthritis skin lesion")) %>%
filter(Gene %in% down_20genes)
# --- Calculate the average per gene and per group ---
expr_datasa_down <- expr_datas_filtrado_down %>%
group_by(Gene, `tissue type:ch1`) %>%
summarise(
mean_expression = mean(Expression, na.rm = TRUE),
sd_expression = sd(Expression, na.rm = TRUE),
.groups = 'drop'
)
# --- Preparation for the chart ---'
dados_para_plotar <- expr_datasa_down %>%
filter(`tissue type:ch1` == "Psoriatic arthritis skin uninvolved")
# --- Graph Generation ---
ggplot(dados_para_plotar, aes(x = mean_expression, y = reorder(Gene, -mean_expression))) +
geom_col(fill = "red", color = "black", width = 0.7) +
labs(
title = "Average Expression of the Main Downregulated Genes in Psoriatic arthritis skin uninvolved", # Título mais claro
subtitle = "For non-lesion PsA vs lesion PsA (GSE205748)",
x = "Average Expression in the Group 'skin uninvolved'", # Rótulo do eixo mais claro
y = "Gene"
) +
theme_minimal(base_size = 14) +
theme(
panel.grid.major.y = element_blank(),
axis.text.y = element_text(size = 12)
)
# --- Create 'count_matrix' ---
up_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange > 1))
down_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange < -1))
genes_interesse <- c(up_genes, down_genes)
# Filter only Psoriatic Arthritis (lesion and non-lesion)
expr_filtrado <- expr_datas %>%
filter(`tissue type:ch1` %in% c("Psoriatic arthritis skin uninvolved", "Psoriatic arthritis skin lesion")) %>%
filter(Gene %in% genes_interesse)
matriz_contagem <- expr_filtrado %>%
select(Gene, Sample, Expression) %>%
pivot_wider(names_from = Sample, values_from = Expression, values_fill = 0) %>%
column_to_rownames(var = "Gene")
# Filter your original “sample_info” object to keep only the samples that are in your final matrix.
# This ensures that the annotation corresponds exactly to the heatmap data.
sample_info_filtrado <- subset(sample_info, rownames(sample_info) %in% colnames(matriz_contagem))
# Select the 20 most highly expressed genes
media_dos_genes <- rowMeans(matriz_contagem)
media_ordenada <- sort(media_dos_genes, decreasing = TRUE)
top_20_genes_por_media <- names(head(media_ordenada, 20))
heatmapData_top20 <- as.matrix(matriz_contagem[top_20_genes_por_media, ])
# Create the annotation for the heatmap from the already filtered information.
annotation_data <- data.frame(
Group = sample_info_filtrado$Group,
row.names = rownames(sample_info_filtrado)
)
# Ensure that the order of the columns in the heatmap and annotation is the same.
heatmapData_top20 <- heatmapData_top20[, rownames(annotation_data)]
# --- Heatmap generation with ComplexHeatmap ---
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
# Rescale data by row (Z-score)
mat_scaled <- t(scale(t(heatmapData_top20)))
# Create the column annotation
ha_col <- HeatmapAnnotation(
Group = annotation_data$Group,
col = list(Group = c("non.lesion.PsA" = "#1f77b4", "lesion.PsA" = "#d62728"))
)
# Define the colour palette
cores <- colorRamp2(c(-2, 0, 2), rev(RColorBrewer::brewer.pal(3, "RdBu")))
# Generate the heatmap
Heatmap(
mat_scaled,
name = "Z-score",
top_annotation = ha_col,
col = cores,
# Divide the columns into groups for clearer viewing
column_split = annotation_data$Group,
cluster_rows = TRUE,
cluster_columns = TRUE, # Cluster within each divided group
show_column_names = FALSE,
show_row_names = TRUE,
column_title = "Heatmap of the 20 most highly expressed DEGs (non-lesion vs. lesion PsA GSE205748)",
heatmap_legend_param = list(title = "Z-score")
)
# Filter differentially expressed genes
de_genes <- subset(r, padj < 0.05 & abs(log2FoldChange) > 1)
# Create a data frame from de_genes
de_genes <- data.frame(de_genes)
# Suppose your dataframe is called df and you want to select columns ‘column1’, “column2” and ‘column3’.
de_genes <- de_genes %>% select(log2FoldChange, padj)
# Assuming that rownames(df) are gene symbols
gene_symbols <- rownames(de_genes)
# Convert gene symbols to Entrez IDs
gene_entrez_ids <- bitr(gene_symbols, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
## 'select()' returned 1:1 mapping between keys and columns
# First, convert the rownames from df into a column
de_genes$Symbol <- rownames(de_genes)
# Join the original table and the converted IDs
de_genes <- merge(de_genes, gene_entrez_ids, by.x = "Symbol", by.y = "SYMBOL", all.x = TRUE)
# Ensure unique symbols
de_genes <- de_genes[!duplicated(de_genes$ENTREZID), ]
# View the first results to verify the addition of the IDs
de_genes <- de_genes %>%
filter(Symbol %in% rownames(matriz_contagem))
BiocManager::install("GOSemSim")
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://p3m.dev/cran/__linux__/jammy/latest
## Bioconductor version 3.20 (BiocManager 1.30.26), R 4.4.1 (2024-06-14)
## Installing package(s) 'GOSemSim'
## Warning in install.packages(...): installation of package 'GOSemSim' had
## non-zero exit status
## Installation paths not writeable, unable to update packages
## path: /usr/local/lib/R/library
## packages:
## boot, class, cluster, foreign, KernSmooth, lattice, MASS, Matrix, mgcv,
## nlme, nnet, rpart, spatial, survival
## path: /usr/local/lib/R/site-library
## packages:
## annotate, AnnotationDbi, ape, aplot, askpass, BH, Biobase, BiocFileCache,
## BiocGenerics, BiocManager, BiocParallel, BiocVersion, biomaRt, Biostrings,
## bit, bit64, bitops, Boruta, broom, bslib, C50, car, caret, checkmate, chk,
## classInt, cli, clock, clue, clusterProfiler, colorspace, commonmark,
## ComplexHeatmap, corrplot, cowplot, cpp11, credentials, crosstalk,
## cutpointr, dbplyr, dbscan, DelayedArray, DEoptimR, Deriv, DESeq2, devtools,
## DiceKriging, diffobj, doBy, docopt, DOSE, downloader, DT, dtplyr, edgeR,
## enrichplot, entropy, evaluate, fastmatch, fgsea, fontawesome, forcats, fs,
## future, future.apply, gargle, GDCRNATools, genefilter, generics,
## GenomeInfoDb, GenomeInfoDbData, GenomicDataCommons, GenomicRanges, gert,
## ggforce, ggfun, ggnewscale, ggplot2, ggplotify, ggpubr, ggraph, ggtree, gh,
## gld, glmnet, globals, glue, GO.db, googledrive, googlesheets4, GOSemSim,
## gower, GPArotation, gplots, graph, graphlayouts, gtable, hardhat, haven,
## HDO.db, here, hms, httpuv, httr2, IRanges, jpeg, jsonlite, KEGGgraph,
## KEGGREST, keras, KMsurv, knitr, labelled, later, lava, lavaan, lgr, limma,
## littler, lme4, lmom, locfit, lubridate, magrittr, markdown, MatchIt,
## MatrixGenerics, MatrixModels, matrixStats, maxstat, mice, mime, miniUI,
## mlbench, mlr3, mlr3learners, mlr3measures, mlr3misc, mlr3pipelines,
## modeltools, multcomp, mvtnorm, networkD3, nloptr, org.Hs.eg.db, party,
## partykit, patchwork, pathview, pbkrtest, pillar, pkgbuild, pkgdown,
## pkgload, plotly, pROC, processx, prodlim, progressr, PRROC, ps, psych,
## purrr, quantmod, quantreg, questionr, qvalue, R.cache, R.oo, R.utils, R6,
## ragg, ranger, Rcpp, RcppArmadillo, RcppTOML, RCurl, readxl, recipes,
## reshape, rgl, Rgraphviz, rlang, rmarkdown, robustbase, roxygen2, rprojroot,
## rsq, rstatix, rstudioapi, rversions, rvest, S4Arrays, S4Vectors, sass,
## scales, scatterpie, sessioninfo, shadowtext, shiny, sp, SparseArray,
## statmod, stringi, stringr, styler, SummarizedExperiment, survminer, sva,
## svglite, sys, systemfonts, TCGAbiolinks, TCGAbiolinksGUI.data, tensorflow,
## testthat, textshaping, tfruns, TH.data, tibble, timeDate, tinytex, treeio,
## tzdb, UCSC.utils, usethis, utf8, VIM, vroom, waldo, withr, xgboost, XML,
## xts, XVector, yulab.utils, zeallot, zip, zlibbioc, zoo
## Old packages: 'bbotk', 'Cubist', 'curl', 'data.table', 'ggsci', 'Hmisc',
## 'igraph', 'openssl', 'parallelly', 'promises', 'reformulas', 'reticulate',
## 'RSQLite', 'xfun', 'xml2'
library(GOSemSim)
BiocManager::install("enrichplot")
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://p3m.dev/cran/__linux__/jammy/latest
## Bioconductor version 3.20 (BiocManager 1.30.26), R 4.4.1 (2024-06-14)
## Installing package(s) 'enrichplot'
## also installing the dependencies 'DOSE', 'GOSemSim'
## Warning in install.packages(...): installation of package 'GOSemSim' had
## non-zero exit status
## Warning in install.packages(...): installation of package 'DOSE' had non-zero
## exit status
## Warning in install.packages(...): installation of package 'enrichplot' had
## non-zero exit status
## Installation paths not writeable, unable to update packages
## path: /usr/local/lib/R/library
## packages:
## boot, class, cluster, foreign, KernSmooth, lattice, MASS, Matrix, mgcv,
## nlme, nnet, rpart, spatial, survival
## path: /usr/local/lib/R/site-library
## packages:
## annotate, AnnotationDbi, ape, aplot, askpass, BH, Biobase, BiocFileCache,
## BiocGenerics, BiocManager, BiocParallel, BiocVersion, biomaRt, Biostrings,
## bit, bit64, bitops, Boruta, broom, bslib, C50, car, caret, checkmate, chk,
## classInt, cli, clock, clue, clusterProfiler, colorspace, commonmark,
## ComplexHeatmap, corrplot, cowplot, cpp11, credentials, crosstalk,
## cutpointr, dbplyr, dbscan, DelayedArray, DEoptimR, Deriv, DESeq2, devtools,
## DiceKriging, diffobj, doBy, docopt, DOSE, downloader, DT, dtplyr, edgeR,
## enrichplot, entropy, evaluate, fastmatch, fgsea, fontawesome, forcats, fs,
## future, future.apply, gargle, GDCRNATools, genefilter, generics,
## GenomeInfoDb, GenomeInfoDbData, GenomicDataCommons, GenomicRanges, gert,
## ggforce, ggfun, ggnewscale, ggplot2, ggplotify, ggpubr, ggraph, ggtree, gh,
## gld, glmnet, globals, glue, GO.db, googledrive, googlesheets4, GOSemSim,
## gower, GPArotation, gplots, graph, graphlayouts, gtable, hardhat, haven,
## HDO.db, here, hms, httpuv, httr2, IRanges, jpeg, jsonlite, KEGGgraph,
## KEGGREST, keras, KMsurv, knitr, labelled, later, lava, lavaan, lgr, limma,
## littler, lme4, lmom, locfit, lubridate, magrittr, markdown, MatchIt,
## MatrixGenerics, MatrixModels, matrixStats, maxstat, mice, mime, miniUI,
## mlbench, mlr3, mlr3learners, mlr3measures, mlr3misc, mlr3pipelines,
## modeltools, multcomp, mvtnorm, networkD3, nloptr, org.Hs.eg.db, party,
## partykit, patchwork, pathview, pbkrtest, pillar, pkgbuild, pkgdown,
## pkgload, plotly, pROC, processx, prodlim, progressr, PRROC, ps, psych,
## purrr, quantmod, quantreg, questionr, qvalue, R.cache, R.oo, R.utils, R6,
## ragg, ranger, Rcpp, RcppArmadillo, RcppTOML, RCurl, readxl, recipes,
## reshape, rgl, Rgraphviz, rlang, rmarkdown, robustbase, roxygen2, rprojroot,
## rsq, rstatix, rstudioapi, rversions, rvest, S4Arrays, S4Vectors, sass,
## scales, scatterpie, sessioninfo, shadowtext, shiny, sp, SparseArray,
## statmod, stringi, stringr, styler, SummarizedExperiment, survminer, sva,
## svglite, sys, systemfonts, TCGAbiolinks, TCGAbiolinksGUI.data, tensorflow,
## testthat, textshaping, tfruns, TH.data, tibble, timeDate, tinytex, treeio,
## tzdb, UCSC.utils, usethis, utf8, VIM, vroom, waldo, withr, xgboost, XML,
## xts, XVector, yulab.utils, zeallot, zip, zlibbioc, zoo
## Old packages: 'bbotk', 'Cubist', 'curl', 'data.table', 'ggsci', 'Hmisc',
## 'igraph', 'openssl', 'parallelly', 'promises', 'reformulas', 'reticulate',
## 'RSQLite', 'xfun', 'xml2'
library(enrichplot)
BiocManager::install("STRINGdb")
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://p3m.dev/cran/__linux__/jammy/latest
## Bioconductor version 3.20 (BiocManager 1.30.26), R 4.4.1 (2024-06-14)
## Warning: package(s) not installed when version(s) same as or greater than current; use
## `force = TRUE` to re-install: 'STRINGdb'
## Installation paths not writeable, unable to update packages
## path: /usr/local/lib/R/library
## packages:
## boot, class, cluster, foreign, KernSmooth, lattice, MASS, Matrix, mgcv,
## nlme, nnet, rpart, spatial, survival
## path: /usr/local/lib/R/site-library
## packages:
## annotate, AnnotationDbi, ape, aplot, askpass, BH, Biobase, BiocFileCache,
## BiocGenerics, BiocManager, BiocParallel, BiocVersion, biomaRt, Biostrings,
## bit, bit64, bitops, Boruta, broom, bslib, C50, car, caret, checkmate, chk,
## classInt, cli, clock, clue, clusterProfiler, colorspace, commonmark,
## ComplexHeatmap, corrplot, cowplot, cpp11, credentials, crosstalk,
## cutpointr, dbplyr, dbscan, DelayedArray, DEoptimR, Deriv, DESeq2, devtools,
## DiceKriging, diffobj, doBy, docopt, DOSE, downloader, DT, dtplyr, edgeR,
## enrichplot, entropy, evaluate, fastmatch, fgsea, fontawesome, forcats, fs,
## future, future.apply, gargle, GDCRNATools, genefilter, generics,
## GenomeInfoDb, GenomeInfoDbData, GenomicDataCommons, GenomicRanges, gert,
## ggforce, ggfun, ggnewscale, ggplot2, ggplotify, ggpubr, ggraph, ggtree, gh,
## gld, glmnet, globals, glue, GO.db, googledrive, googlesheets4, GOSemSim,
## gower, GPArotation, gplots, graph, graphlayouts, gtable, hardhat, haven,
## HDO.db, here, hms, httpuv, httr2, IRanges, jpeg, jsonlite, KEGGgraph,
## KEGGREST, keras, KMsurv, knitr, labelled, later, lava, lavaan, lgr, limma,
## littler, lme4, lmom, locfit, lubridate, magrittr, markdown, MatchIt,
## MatrixGenerics, MatrixModels, matrixStats, maxstat, mice, mime, miniUI,
## mlbench, mlr3, mlr3learners, mlr3measures, mlr3misc, mlr3pipelines,
## modeltools, multcomp, mvtnorm, networkD3, nloptr, org.Hs.eg.db, party,
## partykit, patchwork, pathview, pbkrtest, pillar, pkgbuild, pkgdown,
## pkgload, plotly, pROC, processx, prodlim, progressr, PRROC, ps, psych,
## purrr, quantmod, quantreg, questionr, qvalue, R.cache, R.oo, R.utils, R6,
## ragg, ranger, Rcpp, RcppArmadillo, RcppTOML, RCurl, readxl, recipes,
## reshape, rgl, Rgraphviz, rlang, rmarkdown, robustbase, roxygen2, rprojroot,
## rsq, rstatix, rstudioapi, rversions, rvest, S4Arrays, S4Vectors, sass,
## scales, scatterpie, sessioninfo, shadowtext, shiny, sp, SparseArray,
## statmod, stringi, stringr, styler, SummarizedExperiment, survminer, sva,
## svglite, sys, systemfonts, TCGAbiolinks, TCGAbiolinksGUI.data, tensorflow,
## testthat, textshaping, tfruns, TH.data, tibble, timeDate, tinytex, treeio,
## tzdb, UCSC.utils, usethis, utf8, VIM, vroom, waldo, withr, xgboost, XML,
## xts, XVector, yulab.utils, zeallot, zip, zlibbioc, zoo
## Old packages: 'bbotk', 'Cubist', 'curl', 'data.table', 'ggsci', 'Hmisc',
## 'igraph', 'openssl', 'parallelly', 'promises', 'reformulas', 'reticulate',
## 'RSQLite', 'xfun', 'xml2'
library(STRINGdb)
#Alternative Workflow with STRINGdb Integration in R
# Initialise the STRINGdb object for a specific organism
string_db <- STRINGdb$new(version = "12", species = 9606, score_threshold = 400, input_directory = "")
options(timeout = 600) # increases to 10 minutes
# Gene mapping using STRINGdb
mapped_genes <- string_db$map(de_genes, "Symbol", removeUnmappedRows = TRUE)
## Warning: we couldn't map to STRING 2% of your identifiers
# Recover interactions for mapped genes
interactions <- string_db$get_interactions(mapped_genes$STRING_id)
# Visualise the network using igraph or other visualisation tools
library(igraph)
##
## Attaching package: 'igraph'
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:rtracklayer':
##
## blocks, path
## The following object is masked from 'package:BiocIO':
##
## path
## The following object is masked from 'package:Biostrings':
##
## union
## The following object is masked from 'package:XVector':
##
## path
## The following object is masked from 'package:circlize':
##
## degree
## The following objects are masked from 'package:topGO':
##
## algorithm, graph
## The following objects are masked from 'package:graph':
##
## degree, edges, intersection, union
## The following object is masked from 'package:clusterProfiler':
##
## simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following object is masked from 'package:GenomicRanges':
##
## union
## The following object is masked from 'package:IRanges':
##
## union
## The following object is masked from 'package:S4Vectors':
##
## union
## The following objects are masked from 'package:BiocGenerics':
##
## normalize, path, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
g <- graph_from_data_frame(interactions, directed=FALSE)
plot(g)
# Detect clusters (e.g. Louvain)
clusters <- cluster_louvain(g)
# Plot with colours by cluster
plot(g, vertex.color=clusters$membership)
# Perform GO enrichment analysis
# Biological Process
# Ensure you have the packages installed
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(dplyr) # For data manipulation
# Biological Process
enrich_result <- enrichGO(gene = mapped_genes$Symbol,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP", # Biological Ontology. It can be ‘BP’, “MF”, or ‘CC’
pAdjustMethod = "BH", # Benjamini-Hochberg (BH) is the same as FDR.
pvalueCutoff = 0.05,
qvalueCutoff = 0.2)
if (is.null(enrich_result) || nrow(as.data.frame(enrich_result)) == 0) {
print("No significantly enriched GO terms were found with the provided criteria.")
} else {
plot_data <- as.data.frame(enrich_result) %>%
arrange(p.adjust) %>%
head(15) # Display the 15 most significant terms
# Create a numeric column for the Y axis to use with geom_segment
# The order of the terms on the Y axis is important.
plot_data$y_pos <- rev(seq_along(plot_data$Description)) # To sort from top to bottom
ggplot(plot_data, aes(x = Count, y = y_pos)) + # Use y_pos for the Y axis
# Add geom_segment for the ‘threads’
# It starts at x=0 (or a small value) and goes up to the Count of the term.
geom_segment(aes(x = 0, xend = Count, y = y_pos, yend = y_pos),
color = "gray", linewidth = 0.5) +
geom_point(aes(size = Count, color = p.adjust), alpha = 0.8) +
scale_color_gradientn(
colors = c("darkblue", "steelblue", "lightblue", "lightgreen", "yellow", "orange", "darkred"),
name = "FDR",
trans = "log10",
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
scale_size_area(max_size = 15, name = "Gene Counting") +
# Map labels back to GO descriptions
scale_y_continuous(breaks = plot_data$y_pos, labels = plot_data$Description) +
labs(
title = "GO Enrichment Analysis (Biological Processes)",
x = "Genes Counting",
y = "GO Terms (Biological Processes)"
) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 12, face = "bold"),
axis.title = element_text(size = 14, face = "bold"),
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
legend.position = "right",
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10),
panel.grid.major.y = element_blank(), # Remove the default horizontal grid lines
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, linewidth=1)
)
}
barplot(enrich_result, showCategory = 10, title= "GO Enrichment Analysis")
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the enrichplot package.
## Please report the issue at
## <https://github.com/GuangchuangYu/enrichplot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Network graph of enriched terms
cnetplot(enrich_result, showCategory = 10)
## Warning: `aes_()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`
## ℹ The deprecated feature was likely used in the enrichplot package.
## Please report the issue at
## <https://github.com/GuangchuangYu/enrichplot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Heatmap
heatplot(enrich_result, showCategory = 10)
# Perform KEGG enrichment analysis
ekegg <- enrichKEGG(gene = mapped_genes$ENTREZID, organism = "hsa", pAdjustMethod = "BH", qvalueCutoff = 0.05)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
# Generate GO/STRING style bubble chart
if (is.null(ekegg) || nrow(as.data.frame(ekegg)) == 0) {
print("No significantly enriched KEGG pathways were found with the criteria provided.")
} else {
plot_data_kegg <- as.data.frame(ekegg) %>%
arrange(p.adjust) %>% # Sort by p.adjust (FDR)
head(15) # Selects the 15 most significant pathways to plot
# Creates a numerical position for the terms on the Y-axis, to control the order and lines
plot_data_kegg$y_pos <- rev(seq_along(plot_data_kegg$Description)) # reverse to the most significant at the top
# Starting to build the graph with ggplot2
ggplot(plot_data_kegg, aes(x = Count, y = y_pos)) +
# Adds the connection lines (wires) from the Y-axis to the bubble.
geom_segment(aes(x = 0, xend = Count, y = y_pos, yend = y_pos),
color = "gray80", linewidth = 0.6) + # Softer colour for the lines
# Add the bubbles
geom_point(aes(size = Count, color = p.adjust), alpha = 0.9) +
# Colour scale configuration for FDR (p.adjust)
scale_color_gradientn(
colors = c("darkblue", "steelblue", "lightblue", "lightgreen", "yellow", "orange", "darkred"),
name = "FDR",
trans = "log10", # Use logarithmic scale for FDR
breaks = scales::trans_breaks("log10", function(x) 10^x), # Breaks in scientific notation captions
labels = scales::trans_format("log10", scales::math_format(10^.x)), # Scientific notation format
limits = c(min(plot_data_kegg$p.adjust), max(plot_data_kegg$p.adjust)) # Sets limits for data
) +
# Size scale configuration for Gene Counting
scale_size_area(
max_size = 18, # Maximum bubble size
name = "Gene count", # Caption title size
breaks = unique(round(quantile(plot_data_kegg$Count, probs = c(0.25, 0.5, 0.75, 1.0)))), # Suggests reasonable breaks
labels = unique(round(quantile(plot_data_kegg$Count, probs = c(0.25, 0.5, 0.75, 1.0))))
) +
# Maps the Y-axis labels back to the KEGG pathway descriptions
scale_y_continuous(
breaks = plot_data_kegg$y_pos,
labels = plot_data_kegg$Description
) +
# Labels and Chart Title
labs(
title = "KEGG Pathway Enrichment Analysis",
x = "Gene count", # X-axis is now Gene Count
y = "KEGG Pathway" # Y-axis is the name of the pathway
) +
# Chart Theme and Style
theme_minimal() +
theme(
axis.text.y = element_text(size = 12, face = "bold"), # KEGG terms in bold and larger font
axis.title = element_text(size = 14, face = "bold"), # Axis titles
plot.title = element_text(size = 16, face = "bold", hjust = 0.5), # Centred title
legend.position = "right", # Position of subtitles
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10),
panel.grid.major.y = element_blank(), # Remove standard horizontal grid lines
panel.grid.minor = element_blank(), # Remove minor grid liness
panel.border = element_rect(colour = "black", fill=NA, linewidth=1.2) # Black border around the plot
)
}
barplot(ekegg, showCategory = 10, title = "KEGG Enrichment Analysis")
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
# Network graph of enriched terms
cnetplot(ekegg, showCategory = 10)
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Heatmap
heatplot(ekegg, showCategory = 10)
# Load the necessary packages
library(GenomicFeatures)
##
## Attaching package: 'GenomicFeatures'
## The following object is masked from 'package:topGO':
##
## genes
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(org.Hs.eg.db)
library(JASPAR2020)
library(TFBSTools)
library(SummarizedExperiment)
library(motifmatchr)
# Load the TxDb database
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
# Extraction of exons by gene
exons_by_gene <- exonsBy(txdb, by = "gene")
# Obtain the coordinates of the genes (taking the first and last positions of the exons)
genes_info <- range(exons_by_gene)
# Obtain transcription information
transcripts_info <- transcripts(txdb)
# Map genes of interest to Entrez IDs (replace “de_genes” with the actual list of genes)
gene_entrez <- mapIds(org.Hs.eg.db, keys = mapped_genes$Symbol, column = "ENTREZID", keytype = "SYMBOL", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Filter the genes of interest
promoters_info <- subset(genes_info, names(genes_info) %in% gene_entrez)
# Define promoter regions (2 kb upstream of TSS)
promoters <- promoters(promoters_info, upstream = 2000, downstream = 0)
## Warning in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE): GRanges object contains 1 out-of-bound range located on sequence
## chr2_GL383522v1_alt. Note that ranges located on a sequence whose
## length is unknown (NA) or on a circular sequence are not considered
## out-of-bound (use seqlengths() and isCircular() to get the lengths and
## circularity flags of the underlying sequences). You can use trim() to
## trim these ranges. See ?`trim,GenomicRanges-method` for more
## information.
# Filter the main chromosomes
promoters <- keepStandardChromosomes(promoters, pruning.mode = "coarse")
# Remove any regions that exceed the limits of the chromosomes.
promoters <- trim(promoters)
# Convert CompressedGRangesList promoters to GRanges
promoters_gr <- unlist(promoters)
# Load the JASPAR grounds database
motifs <- getMatrixSet(JASPAR2020, opts = list(species = "Homo sapiens"))
# Create a data frame with the IDs of the reasons and their names (TFs)
motif_info <- data.frame(
motif_id = names(motifs),
tf_name = sapply(motifs, function(x) x@name),
stringsAsFactors = FALSE
)
# Enriching motifs in promoting regions
motifHits <- matchMotifs(motifs, promoters_gr, genome = BSgenome.Hsapiens.UCSC.hg38)
# Count the number of occurrences of the reasons in each promoting region.
motif_counts <- countOverlaps(promoters_gr, motifHits)
# Add counts to GRanges with promoting regions
promoters_gr$motif_counts <- motif_counts
# Summary of motif counts
summary(promoters_gr$motif_counts)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.4605 1.0000 1.0000
# Obtain the names of the motifs and TFs
motif_names <- names(motifHits)
# Extract the reason table from the RangedSummarizedExperiment object
motif_data <- assays(motifHits)[[1]] # Assuming that the table of reasons is in the first list of assays
# Obtain information about TFs
# The names of TFs may be in the colnames or in associated metadata
tf_names <- colnames(motif_data)
# Add the reason count if available
motif_counts <- rowSums(motif_data)
# Check the dimensions of the motif_data object
dim(motif_data) # Number of rows and columns
## [1] 152 633
# Check the length of tf_names and motif_counts
tf_names_length <- length(tf_names)
motif_counts_length <- length(motif_counts)
# Print the lengths for diagnosis
cat("Length of tf_names:", tf_names_length, "\n")
## Length of tf_names: 633
cat("Length of motif_counts:", motif_counts_length, "\n")
## Length of motif_counts: 152
# Verify that the dimensions of motif_data correspond to the number of TFs and motifs
motif_data_dims <- dim(motif_data)
cat("Dimensions of motif_data (rows, columns):", motif_data_dims, "\n")
## Dimensions of motif_data (rows, columns): 152 633
# Check whether the number of reasons is greater or lesser
if (tf_names_length > motif_counts_length) {
# Check additional columns
tf_names <- tf_names[1:motif_counts_length] # Adjust to the length of motif_counts
} else {
# Adjust motif_counts to match tf_names
motif_counts <- motif_counts[1:tf_names_length]
}
# Now, create the data frame with adjusted lengths
motif_summary <- data.frame(
tf_name = tf_names,
motif_count = motif_counts
)
# Check the correspondence between TF names and the columns in motif_data
all(tf_names %in% colnames(motif_data)) # Should return TRUE if all TF names are present
## [1] TRUE
# Sort and identify key TFs
tf_summary <- motif_summary[order(motif_summary$motif_count, decreasing = TRUE), ]
top_tf_summary <- head(tf_summary, 121) # Adjust the number as required
# Mapping Entrez IDs to gene symbols
gene_symbols <- mapIds(org.Hs.eg.db, keys = rownames(top_tf_summary), column = "SYMBOL", keytype = "ENTREZID", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Replace rownames with gene symbols
rownames(top_tf_summary) <- gene_symbols
# Add reason ID as a column
top_tf_summary$motif_id <- rownames(top_tf_summary)
# Add to the table of TF names
motif1 <- merge(top_tf_summary, motif_info, by.x = "tf_name", by.y = "motif_id")
# Reorder columns
motif2 <- motif1[, c("motif_id", "tf_name.y", "motif_count")]
colnames(motif2) <- c("motif_id", "tf_name", "motif_count")
# Sort by frequency (motif_count) in descending order
motif2 <- motif2 %>% arrange(desc(motif_count))
print(motif2)
## motif_id tf_name motif_count
## 1 CCL20 PAX4 231
## 2 CXCL8 BARHL2 205
## 3 CCRL2 PHOX2A 201
## 4 CCR7 SOX9 195
## 5 MMP9 HNF1A 194
## 6 TNFSF10 PDX1 185
## 7 CCND1 NEUROG2 183
## 8 GJB6 MAX::MYC 176
## 9 CXCL2 GATA1::TAL1 173
## 10 PI3 LHX6 173
## 11 CD83 RAX 173
## 12 IFI6 NR2C2 169
## 13 FUT2 MAF::NFE2 167
## 14 IFIH1 POU4F2 165
## 15 NMI PITX3 165
## 16 DKK1 FOSL2 164
## 17 TNFRSF9 DBP 164
## 18 PRF1 MSC 164
## 19 GEM TCF7L2 162
## 20 GZMA SREBF1 162
## 21 GZMK THAP1 162
## 22 PINK1 SRF 161
## 23 TGFBR3 TFCP2 161
## 24 MEFV GRHL1 161
## 25 RPL7 NFIX 161
## 26 KLRB1 FLI1 160
## 27 EFCAB13 TAL1::TCF3 158
## 28 SAMD9 MLX 158
## 29 PLIN5 HNF1B 156
## 30 PLA2G4D ESR2 153
## 31 PPARG MEOX1 153
## 32 RORC NFIA 153
## 33 DDX60 MSX1 152
## 34 IL12B CREB3 151
## 35 MCAM GCM1 151
## 36 FAXDC2 PPARG 149
## 37 MUCL1 RORA(var.2) 149
## 38 IL4 ALX3 149
## 39 SGK1 SP4 149
## 40 CCR2 ZBTB7B 148
## 41 IL17F PBX1 147
## 42 SLC7A11 JUND(var.2) 147
## 43 TRIM22 FOXD1 146
## 44 APOL6 ZBTB18 146
## 45 IL18 ELF5 145
## 46 JUND ETV6 145
## 47 IRS1 EN2 143
## 48 ITGAL ESX1 143
## 49 SERPINB1 PLAG1 142
## 50 OLR1 IRF9 142
## 51 RGPD6 ZBTB7C 142
## 52 IL37 ELK4 141
## 53 DDIT3 NFATC2 140
## 54 S100A8 NHLH1 139
## 55 TBX21 FOXP2 139
## 56 SOST JDP2 139
## 57 PER1 KLF13 139
## 58 BGLAP NKX6-1 139
## 59 SOD2 TBX2 139
## 60 LYZ FOXO3 138
## 61 ABCD2 FOS 138
## 62 GJB2 ZBTB33 138
## 63 NOD2 PAX7 138
## 64 RSAD2 PROP1 138
## 65 CTLA4 NFIC::TLX1 137
## 66 MMP7 HINFP 137
## 67 DUSP4 INSM1 137
## 68 CSF2 REL 136
## 69 FOS JUN 136
## 70 IL36RN STAT1::STAT2 136
## 71 BMP2 SPIC 136
## 72 SYT1 TFAP4 136
## 73 PPARGC1A PAX6 135
## 74 CMTM2 RELA 135
## 75 HYAL4 JUN(var.2) 135
## 76 IL2RA SHOX 135
## 77 CRB1 FOXH1 134
## 78 XAF1 MIXL1 134
## 79 IL21 NEUROD2 134
## 80 GBP5 RORA 133
## 81 SLC51B RXRA::VDR 133
## 82 IL17A ELF4 133
## 83 SCN1A NKX6-2 133
## 84 IL23R NR1H2::RXRA 132
## 85 IL23A JDP2(var.2) 132
## 86 MMP1 GSC 131
## 87 NOS2 IRF8 131
## 88 APOL1 OTX1 131
## 89 ALB ARNT::HIF1A 129
## 90 CYP1A1 CTCF 128
## 91 IL22 ISX 128
## 92 CD69 VSX2 128
## 93 EGF NR4A2 127
## 94 GATA3 POU2F2 127
## 95 SLC7A5 LBX2 127
## 96 CD3E PRRX1 127
## 97 GPR35 IRF1 126
## 98 SPON2 IRF2 126
## 99 ZNF415 MYF6 126
## 100 GZMB SREBF2 125
## 101 CAPS2 NOTO 125
## 102 IFI16 KLF5 123
## 103 IL1F10 MSX2 123
## 104 CD63 VSX1 123
## 105 JDP2 RREB1 122
## 106 CTSK ZNF354C 122
## 107 TYMP RARA::RXRA 122
## 108 NKG7 HSF1 122
## 109 GBP1 SMAD2::SMAD3::SMAD4 122
## 110 CXCR2 BHLHE41 122
## 111 CD38 VENTX 122
## 112 ACKR2 SRY 121
## 113 ICOS YY1 121
## 114 GPT STAT1 121
## 115 FRZB MEF2C 121
## 116 ACP7 FOXI1 120
## 117 CD274 STAT3 120
## 118 IL1B POU6F1 120
## 119 FOSL1 ZIC3 120
## 120 NAMPT FOXF2 119
## 121 LEP FOXL1 118
# Wald test to obtain contrast-specific results
dsd <- DESeq(dsa, test = "Wald", sfType = "poscount")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 4 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
r <- results(dsd, contrast = c("Group", groups[2], groups[1]), alpha = 0.05, pAdjustMethod = "fdr")
groups <- levels(colData(dsd)$Group)
df <- as.data.frame(r)
# Filter only significant genes
sig_genes <- subset(r, padj < 0.05 & abs(log2FoldChange) >= 1)
# volcano plot
old.pal <- palette(c("#00BFFF", "#FF3030")) # low-hi colors
par(mar=c(4,4,2,1), cex.main=1.5)
with(sig_genes, {
plot(log2FoldChange, -log10(padj), main=paste(groups[2], "vs", groups[1]),
xlab="log2FC", ylab="-log10(Padj)", pch=20, cex=0.5)
text(log2FoldChange, -log10(padj), labels=rownames(r), cex=0.6, pos=4)
})
with(subset(r, padj<0.05 & abs(log2FoldChange) >= 1),
points(log2FoldChange, -log10(padj), pch=20, col=(sign(log2FoldChange) + 3)/2, cex=1))
legend("bottomleft", title=paste("Padj<", 0.05, sep=""), legend=c("down", "up"), pch=20,col=1:2)
# Plot only significant genes
# MD PLOT
par(mar=c(4,4,2,1), cex.main=1.5)
with(sig_genes, {
plot(log10(baseMean), log2FoldChange,
main=paste(groups[2], "vs", groups[1]),
xlab="log10(mean of normalized counts)", ylab="log2FoldChange",
pch=20, col=(sign(log2FoldChange) + 3)/2, cex=1)
text(log10(baseMean), log2FoldChange, labels=rownames(sig_genes), cex=0.6, pos=4)
})
legend("bottomleft", title=paste("Padj<", 0.05, sep=""), legend=c("down", "up"), pch=20, col=1:2)
abline(h=0)
palette(old.pal) # restore palette
# Add gene symbols to the points on the graph
# Install the packages if you do not already have them
library(ggplot2)
library(ggrepel)
plotVolcano <- function(res, title = "Volcano Plot") {
res$group <- "NS"
res$group[res$padj < 0.05 & res$log2FoldChange > 1] <- "Up"
res$group[res$padj < 0.05 & res$log2FoldChange < -1] <- "Down"
res$label <- ifelse(res$group != "NS", rownames(res), NA)
ggplot(res, aes(x = log2FoldChange, y = -log10(padj), color = group)) +
geom_point(alpha = 0.7, size = 2) +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed") +
scale_color_manual(values = c("Up" = "firebrick", "Down" = "dodgerblue", "NS" = "grey80")) +
ggrepel::geom_text_repel(aes(label = label), size = 3, max.overlaps = Inf) +
theme_minimal() +
labs(title = title, x = "log2 Fold Change", y = "-log10 adjusted p-value", color = "Regulation")
}
# Example of function usage
plotVolcano(r, paste(groups[2], "vs", groups[1]))
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 280 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
# Genes upregulated
up_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange >= 1))
# Genes downregulated
down_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange <= -1))
# All DEGs with |log2FC| > 2
de_genes <- c(up_genes, down_genes)
expr_datasa <- as.data.frame(expr_data[rownames(expr_data) %in% de_genes, ])
# Convert expression data to long format for ggplot2
expr_datas <- as.data.frame(expr_datasa)
expr_datas$Gene <- rownames(expr_datas)
expr_datas <- pivot_longer(expr_datas, cols = -Gene, names_to = "Sample", values_to = "Expression")
expr_datas <- merge(expr_datas, pheno_data, by.x = "Sample", by.y = "geo_accession")
# Check column names and a sample of the data
expresse <- r[order(r$padj)[1:429], ]
expresse <- merge(as.data.frame(expresse), expr_datasa, by = 0, sort = F)
library(dplyr)
library(tibble)
library(clusterProfiler)
library(ggplot2)
r_df <- as.data.frame(r)
# Assuming that “r_df” is your data frame with the DESeq2 results
# This ensures that gene names are preserved.
r_df_com_genes <- r_df %>%
rownames_to_column(var = "Gene")
top30_fc <- r_df_com_genes %>%
dplyr::filter(padj < 0.05 & abs(log2FoldChange) > 1) %>%
dplyr::arrange(desc(abs(log2FoldChange))) %>%
dplyr::slice(1:30) %>%
dplyr::mutate(
Regulation = case_when(
log2FoldChange > 1 ~ "Upregulated",
log2FoldChange < -1 ~ "Downregulated"
)
)
# Chart with positive and negative bars
ggplot(top30_fc, aes(x = reorder(Gene, log2FoldChange), y = log2FoldChange, fill = Regulation)) +
geom_col() +
coord_flip() + # Place the genes on the Y axis
scale_fill_manual(values = c("Upregulated" = "steelblue", "Downregulated" = "tomato")) +
scale_y_continuous(breaks = seq(-8, 4, by = 1)) + # Adjust the limits if necessary.
theme_bw() +
labs(
title = "The 30 most prominent DEGs between lesion PsA and control (GSE205748)",
x = "Gene",
y = "log2 Fold Change",
fill = "Regulation"
) +
theme(
axis.text.y = element_text(color = "black", face = "bold", size = 9)
)
# --- Selects the 20 MOST overexpressed genes ---
up_20genes <- expresse %>%
filter(padj < 0.05 & log2FoldChange > 1) %>%
arrange(desc(log2FoldChange)) %>%
head(20) %>%
pull(Row.names)
# --- Filters expression data ---
expr_datas_filtrado_up <- expr_datas %>%
filter(`tissue type:ch1` %in% c("Psoriatic arthritis skin lesion", "Healthy control skin")) %>%
filter(Gene %in% up_20genes)
# --- Calculates the average per gene and per group ---
expr_datasa_up <- expr_datas_filtrado_up %>%
group_by(Gene, `tissue type:ch1`) %>%
summarise(
mean_expression = mean(Expression, na.rm = TRUE),
sd_expression = sd(Expression, na.rm = TRUE),
.groups = 'drop'
)
# --- Preparation for the chart ---
# Filter to show the average expression ONLY in the “lesion” group
dados_para_plotar <- expr_datasa_up %>%
filter(`tissue type:ch1` == "Psoriatic arthritis skin lesion")
# --- Graph Generation ---
ggplot(dados_para_plotar, aes(x = mean_expression, y = reorder(Gene, mean_expression))) +
geom_col(fill = "steelblue", color = "black", width = 0.7) +
labs(
title = "Average Expression of the Main Upregulated Genes in Psoriatic arthritis skin lesion", # Título mais claro
subtitle = "For lesion PsA vs control (GSE205748)",
x = "Average Expression in the Group 'skin lesion'", # Rótulo do eixo mais claro
y = "Gene"
) +
theme_minimal(base_size = 14) +
theme(
panel.grid.major.y = element_blank(),
axis.text.y = element_text(size = 12)
)
# --- Selects the 20 MOST under-expressed genes ---
down_20genes <- expresse %>%
filter(padj < 0.05 & log2FoldChange < -1) %>%
arrange(log2FoldChange) %>% # <-- Sort from most negative to least negative
head(20) %>%
pull(Row.names)
# --- Filters expression data for genes and groups of interest ---
expr_datas_filtrado_down <- expr_datas %>%
filter(`tissue type:ch1` %in% c("Psoriatic arthritis skin lesion", "Healthy control skin")) %>%
filter(Gene %in% down_20genes)
# --- Calculates the average per gene and per group ---
expr_datasa_down <- expr_datas_filtrado_down %>%
group_by(Gene, `tissue type:ch1`) %>%
summarise(
mean_expression = mean(Expression, na.rm = TRUE),
sd_expression = sd(Expression, na.rm = TRUE),
.groups = 'drop'
)
# --- Preparation for the chart ---
# Filter to show the average expression ONLY in the “lesion” group
dados_para_plotar <- expr_datasa_down %>%
filter(`tissue type:ch1` == "Psoriatic arthritis skin lesion")
# --- Graph Generation ---
ggplot(dados_para_plotar, aes(x = mean_expression, y = reorder(Gene, -mean_expression))) +
geom_col(fill = "red", color = "black", width = 0.7) +
labs(
title = "Average Expression of the Main Downregulated Genes in Psoriatic arthritis skin lesion", # Título mais claro
subtitle = "For lesion PsA vs control (GSE205748)",
x = "Average Expression in the Group 'skin lesion'", # Rótulo do eixo mais claro
y = "Gene"
) +
theme_minimal(base_size = 14) +
theme(
panel.grid.major.y = element_blank(),
axis.text.y = element_text(size = 12)
)
# --- The code to create 'count_array' ---
up_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange > 1))
down_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange < -1))
genes_interesse <- c(up_genes, down_genes)
expr_filtrado <- expr_datas %>%
filter(`tissue type:ch1` %in% c("Psoriatic arthritis skin lesion", "Healthy control skin")) %>%
filter(Gene %in% genes_interesse)
matriz_contagem <- expr_filtrado %>%
select(Gene, Sample, Expression) %>%
pivot_wider(names_from = Sample, values_from = Expression, values_fill = 0) %>%
column_to_rownames(var = "Gene")
# Filter your original “sample_info” object to keep only the samples that are in your final matrix
# This ensures that the annotation corresponds exactly to the heatmap data
sample_info_filtrado <- subset(sample_info, rownames(sample_info) %in% colnames(matriz_contagem))
# Select the 20 most highly expressed genes
media_dos_genes <- rowMeans(matriz_contagem)
media_ordenada <- sort(media_dos_genes, decreasing = TRUE)
top_20_genes_por_media <- names(head(media_ordenada, 20))
heatmapData_top20 <- as.matrix(matriz_contagem[top_20_genes_por_media, ])
# Create the annotation for the heatmap from the filtered and corrected information
annotation_data <- data.frame(
Group = sample_info_filtrado$Group,
row.names = rownames(sample_info_filtrado)
)
# Ensure that the order of the columns in the heatmap and annotation is the same
heatmapData_top20 <- heatmapData_top20[, rownames(annotation_data)]
# --- Heatmap generation with ComplexHeatmap ---
# Rescale data by row (Z-score)
mat_scaled <- t(scale(t(heatmapData_top20)))
# Create the column annotation
ha_col <- HeatmapAnnotation(
Group = annotation_data$Group,
col = list(Group = c("lesion.PsA" = "#1f77b4", "control" = "#d62728"))
)
# Define the colour palette
cores <- colorRamp2(c(-2, 0, 2), rev(RColorBrewer::brewer.pal(3, "RdBu")))
# Generate the heatmap
Heatmap(
mat_scaled,
name = "Z-score",
top_annotation = ha_col,
col = cores,
# Divide the columns into groups for clearer viewing
column_split = annotation_data$Group,
cluster_rows = TRUE,
cluster_columns = TRUE, # Cluster within each divided group
show_column_names = FALSE,
show_row_names = TRUE,
column_title = "Heatmap of the 20 most highly expressed DEGs (lesion PsA vs. control GSE205748)",
heatmap_legend_param = list(title = "Z-score")
)
# Filter differentially expressed genes
de_genes <- subset(r, padj < 0.05 & abs(log2FoldChange) > 1)
#Create a data frame from de_genes
de_genes <- data.frame(de_genes)
# Suppose your dataframe is called df and you want to select columns ‘column1’, “column2” and ‘column3’
de_genes <- de_genes %>% select(log2FoldChange, padj)
# Assuming that rownames(df) are gene symbols
gene_symbols <- rownames(de_genes)
# Convert gene symbols to Entrez IDs
gene_entrez_ids <- bitr(gene_symbols, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
## 'select()' returned 1:1 mapping between keys and columns
# First, convert the rownames from df into a column
de_genes$Symbol <- rownames(de_genes)
# Performs the join between the original table and the converted IDs
de_genes <- merge(de_genes, gene_entrez_ids, by.x = "Symbol", by.y = "SYMBOL", all.x = TRUE)
# Ensure unique symbols
de_genes <- de_genes[!duplicated(de_genes$ENTREZID), ]
# Alternative Workflow with STRINGdb Integration in R
# Initialise the STRINGdb object for a specific organism
string_db <- STRINGdb$new(version = "12", species = 9606, score_threshold = 400, input_directory = "")
options(timeout = 600) # increases to 10 minutes
# Gene mapping using STRINGdb
mapped_genes <- string_db$map(de_genes, "Symbol", removeUnmappedRows = TRUE)
## Warning: we couldn't map to STRING 2% of your identifiers
# Recover interactions for mapped genes
interactions <- string_db$get_interactions(mapped_genes$STRING_id)
# Visualise the network using igraph or other visualisation tools
library(igraph)
g <- graph_from_data_frame(interactions, directed=FALSE)
plot(g)
# Detect clusters (e.g. Louvain)
clusters <- cluster_louvain(g)
# Plot with colours by cluster
plot(g, vertex.color=clusters$membership)
# Perform GO enrichment analysis
# Biological Process
enrich_result <- enrichGO(gene = mapped_genes$Symbol,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP", # Biological Ontology. It can be ‘BP’, “MF”, or ‘CC’.
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.2)
barplot(enrich_result, showCategory = 10, title= "GO Enrichment Analysis")
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
# Network graph of enriched terms
cnetplot(enrich_result, showCategory = 10)
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Heatmap
heatplot(enrich_result, showCategory = 10)
# Perform KEGG enrichment analysis
ekegg <- enrichKEGG(gene = mapped_genes$ENTREZID, organism = "hsa", pAdjustMethod = "BH", qvalueCutoff = 0.05)
# Generate GO/STRING style bubble chart
if (is.null(ekegg) || nrow(as.data.frame(ekegg)) == 0) {
print("No significantly enriched KEGG pathways were found with the criteria provided.")
} else {
plot_data_kegg <- as.data.frame(ekegg) %>%
arrange(p.adjust) %>% # Sort by p.adjust (FDR)
head(15) # Selects the 15 most significant pathways to plot
# Creates a numerical position for the terms on the Y-axis, to control the order and lines
plot_data_kegg$y_pos <- rev(seq_along(plot_data_kegg$Description)) # reverse to the most significant at the top
# Starting to build the graph with ggplot2
ggplot(plot_data_kegg, aes(x = Count, y = y_pos)) +
# Adds the connection lines (wires) from the Y-axis to the bubble
geom_segment(aes(x = 0, xend = Count, y = y_pos, yend = y_pos),
color = "gray80", linewidth = 0.6) + # Softer colour for the lines
# Add the bubbles
geom_point(aes(size = Count, color = p.adjust), alpha = 0.9) +
# Colour scale configuration for FDR (p.adjust)
scale_color_gradientn(
colors = c("darkblue", "steelblue", "lightblue", "lightgreen", "yellow", "orange", "darkred"),
name = "FDR",
trans = "log10", # Use logarithmic scale for FDR
breaks = scales::trans_breaks("log10", function(x) 10^x), # Breaks in scientific notation captions
labels = scales::trans_format("log10", scales::math_format(10^.x)), # Scientific notation format
limits = c(min(plot_data_kegg$p.adjust), max(plot_data_kegg$p.adjust)) # Sets limits for data
) +
# Size scale configuration for Gene Counting
scale_size_area(
max_size = 18, # Maximum bubble size
name = "Gene count", # Caption title size
breaks = unique(round(quantile(plot_data_kegg$Count, probs = c(0.25, 0.5, 0.75, 1.0)))), # Suggests reasonable breaks
labels = unique(round(quantile(plot_data_kegg$Count, probs = c(0.25, 0.5, 0.75, 1.0))))
) +
# Maps the Y-axis labels back to the KEGG pathway descriptions
scale_y_continuous(
breaks = plot_data_kegg$y_pos,
labels = plot_data_kegg$Description
) +
# Labels and Chart Title
labs(
title = "KEGG Pathway Enrichment Analysis",
x = "Gene count", # X-axis is now Gene Count
y = "KEGG Pathway" # Y-axis is the name of the pathway
) +
# Chart Theme and Style
theme_minimal() +
theme(
axis.text.y = element_text(size = 12, face = "bold"), # KEGG terms in bold and larger font
axis.title = element_text(size = 14, face = "bold"), # Axis titles
plot.title = element_text(size = 16, face = "bold", hjust = 0.5), # Centred title
legend.position = "right", # Position of subtitles
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10),
panel.grid.major.y = element_blank(), # Remove standard horizontal grid lines
panel.grid.minor = element_blank(), # Remove smaller grid lines
panel.border = element_rect(colour = "black", fill=NA, linewidth=1.2) # Black border around the plot
)
}
barplot(ekegg, showCategory = 10, title = "KEGG Enrichment Analysis")
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
# Network graph of enriched terms
cnetplot(ekegg, showCategory = 10)
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Heatmap
heatplot(ekegg, showCategory = 10)
# Load the TxDb database
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
# Extraction of exons by gene
exons_by_gene <- exonsBy(txdb, by = "gene")
# Obtain the coordinates of the genes (taking the first and last positions of the exons)
genes_info <- range(exons_by_gene)
# Obtain transcription information
transcripts_info <- transcripts(txdb)
# Map genes of interest to Entrez IDs (replace “de_genes” with the actual list of genes)
gene_entrez <- mapIds(org.Hs.eg.db, keys = mapped_genes$Symbol, column = "ENTREZID", keytype = "SYMBOL", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Filter the genes of interest
promoters_info <- subset(genes_info, names(genes_info) %in% gene_entrez)
# Define promoter regions (2 kb upstream of TSS)
promoters <- promoters(promoters_info, upstream = 2000, downstream = 0)
# Filter the main chromosomes
promoters <- keepStandardChromosomes(promoters, pruning.mode = "coarse")
# Remove any regions that exceed the limits of the chromosomes.
promoters <- trim(promoters)
# Convert CompressedGRangesList promoters to GRanges
promoters_gr <- unlist(promoters)
# Load the JASPAR grounds database
motifs <- getMatrixSet(JASPAR2020, opts = list(species = "Homo sapiens"))
# Create a data frame with the IDs of the reasons and their names (TFs)
motif_info <- data.frame(
motif_id = names(motifs),
tf_name = sapply(motifs, function(x) x@name),
stringsAsFactors = FALSE
)
# Enriching motifs in promoting regions
motifHits <- matchMotifs(motifs, promoters_gr, genome = BSgenome.Hsapiens.UCSC.hg38)
# Count the number of occurrences of the reasons in each promoting region
motif_counts <- countOverlaps(promoters_gr, motifHits)
# Add counts to GRanges with promoting regions
promoters_gr$motif_counts <- motif_counts
# Summary of motif counts
summary(promoters_gr$motif_counts)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.4965 1.0000 1.0000
# Obtain the names of the motifs and TFs
motif_names <- names(motifHits)
# Extract the reason table from the RangedSummarizedExperiment object
motif_data <- assays(motifHits)[[1]] # Assuming that the table of reasons is in the first list of assays
# Obtain information about TFs
# The names of the TFs can be in the colnames or in associated metadata
tf_names <- colnames(motif_data)
# Add the reason count if available
motif_counts <- rowSums(motif_data) # Count the occurrence of motifs (adjust as necessary)
# Check the dimensions of the motif_data object
dim(motif_data) # Number of rows and columns
## [1] 141 633
# Check the length of tf_names and motif_counts
tf_names_length <- length(tf_names)
motif_counts_length <- length(motif_counts)
# Print the lengths for diagnosis
cat("Length of tf_names:", tf_names_length, "\n")
## Length of tf_names: 633
cat("Length of motif_counts:", motif_counts_length, "\n")
## Length of motif_counts: 141
# Verify that the dimensions of motif_data correspond to the number of TFs and motifs
motif_data_dims <- dim(motif_data)
cat("Dimensions of motif_data (rows, columns):", motif_data_dims, "\n")
## Dimensions of motif_data (rows, columns): 141 633
# Check whether the number of reasons is greater or lesser
if (tf_names_length > motif_counts_length) {
# Check additional columns
tf_names <- tf_names[1:motif_counts_length] # Adjust to the length of motif_counts
} else {
# Adjust motif_counts to match tf_names
motif_counts <- motif_counts[1:tf_names_length]
}
# Now, I have created the data frame with adjusted lengths
motif_summary <- data.frame(
tf_name = tf_names,
motif_count = motif_counts
)
# Check the correspondence between TF names and the columns in motif_data
all(tf_names %in% colnames(motif_data)) # Should return TRUE if all TF names are present
## [1] TRUE
# Sort and identify key TFs
tf_summary <- motif_summary[order(motif_summary$motif_count, decreasing = TRUE), ]
top_tf_summary <- head(tf_summary, 130) # Adjust the number as required.
# Mapping Entrez IDs to gene symbols
gene_symbols <- mapIds(org.Hs.eg.db, keys = rownames(top_tf_summary), column = "SYMBOL", keytype = "ENTREZID", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Replace rownames with gene symbols
rownames(top_tf_summary) <- gene_symbols
top_tf_summary$motif_id <- rownames(top_tf_summary)
# Combine with the table of TF names
motif1 <- merge(top_tf_summary, motif_info, by.x = "tf_name", by.y = "motif_id")
# Reorganise columns for better viewing
motif2<- motif1[, c("motif_id", "tf_name.y", "motif_count")]
colnames(motif2) <- c("motif_id", "tf_name.y", "motif_count")
# View
print(motif2)
## motif_id tf_name.y motif_count
## 1 VIM TBXT 176
## 2 MMP1 EN1 131
## 3 NAMPT FOXF2 117
## 4 TRIM22 FOXD1 144
## 5 OLR1 FOXL1 139
## 6 NOS2 FOXI1 131
## 7 PPARG HNF1A 153
## 8 SELL NHLH1 127
## 9 GZMB IRF1 125
## 10 SPON2 IRF2 127
## 11 GJB6 MAX::MYC 176
## 12 PPARGC1A PPARG 135
## 13 SOD2 PAX4 137
## 14 IL17F PAX6 147
## 15 GBP5 PBX1 133
## 16 MUCL1 RORA 148
## 17 CCR7 RORA(var.2) 197
## 18 ACKR2 RREB1 120
## 19 EFCAB13 RXRA::VDR 156
## 20 ICOS ELK4 121
## 21 PPARGC1B SOX9 138
## 22 CTLA4 SRY 137
## 23 CTSK TAL1::TCF3 123
## 24 IFIT3 YY1 115
## 25 CYP1A1 REL 128
## 26 ZNF483 RELA 112
## 27 KANK4 NR1H2::RXRA 117
## 28 DDIT3 NFIC::TLX1 139
## 29 TYMP ZNF354C 121
## 30 TOMM7 HINFP 116
## 31 CD36 PDX1 181
## 32 MEFV ELF5 159
## 33 HIF1A STAT1 152
## 34 ALB REST 131
## 35 EGF CTCF 127
## 36 IFI16 GATA1::TAL1 122
## 37 HK2 STAT3 126
## 38 EOMES TFCP2 111
## 39 SERPINB1 EWSR1-FLI1 143
## 40 FCGR1A NFATC2 111
## 41 XAF1 HNF1B 133
## 42 FCGR3A INSM1 155
## 43 IL22 FOXO3 128
## 44 DKK1 RARA::RXRA 164
## 45 CRB1 NR4A2 133
## 46 FOS PLAG1 138
## 47 TBX21 ESR2 141
## 48 HYAL4 ARNT::HIF1A 134
## 49 SLC7A11 DUX4 145
## 50 MX1 FLI1 113
## 51 FRZB FOS 121
## 52 FUT2 FOSL2 166
## 53 IFI6 FOXH1 167
## 54 HERC6 HSF1 122
## 55 PTPN22 JUN 113
## 56 GATA3 JUN(var.2) 126
## 57 GBP1 JUND(var.2) 121
## 58 IL36RN MEF2C 135
## 59 GEM MAF::NFE2 158
## 60 GJB2 NR2C2 138
## 61 IL37 NRF1 140
## 62 PLA2G4D POU2F2 154
## 63 GPR35 SMAD2::SMAD3::SMAD4 126
## 64 GPT STAT1::STAT2 120
## 65 CD274 TCF7L2 122
## 66 CXCL2 ZBTB33 170
## 67 IFNG FOXP2 113
## 68 IL1B SREBF2 119
## 69 IL2RA THAP1 135
## 70 IL4 KLF5 149
## 71 CXCL8 DMRT3 207
## 72 CXCR2 FOXG1 122
## 73 IL12B NFATC3 153
## 74 TNFRSF9 POU6F1 160
## 75 IL17A SHOX 133
## 76 IL18 ALX3 144
## 77 CXCL10 BARHL2 114
## 78 JUND BHLHE41 146
## 79 KLRB1 CENPB 159
## 80 ACP7 CREB3 120
## 81 LEP DBP 119
## 82 LYZ ELF4 137
## 83 MMP7 ESX1 135
## 84 MMP9 ETV6 194
## 85 PER1 GCM1 138
## 86 PI3 GRHL1 168
## 87 ACP5 GSC 161
## 88 SMOX HEY2 128
## 89 SAMD9 HOXC11 157
## 90 PRF1 IRF8 166
## 91 DDX60 IRF9 149
## 92 ZNF415 ISX 125
## 93 ADAMTS9 JDP2 158
## 94 IL21 JDP2(var.2) 134
## 95 CCND1 KLF13 183
## 96 RORC LHX6 153
## 97 RPL7 MEF2B 160
## 98 RPL15 MEOX1 154
## 99 RPL41 MIXL1 124
## 100 RPS7 MLX 126
## 101 RPS19 MLXIPL 138
## 102 RPS21 MSC 130
## 103 S100A8 MSX1 135
## 104 S100A12 NEUROD2 115
## 105 BGLAP NEUROG2 139
## 106 CCL20 NFIA 237
## 107 PRDM1 NFIX 154
## 108 NOD2 NKX2-3 139
## 109 IFIH1 NKX2-8 169
## 110 CSMD1 NKX6-1 108
## 111 BMP2 NKX6-2 138
## 112 SPP1 PAX7 111
## 113 STAT1 POU4F2 108
## 114 STAT3 SP4 136
## 115 SYT1 SPDEF 134
## 116 TGFBR3 SPIC 160
## 117 ZC3H12A TBX2 113
## 118 FOSL1 TBX20 120
## 119 APOL6 TBX21 145
## 120 SLC7A5 TFAP4 126
## 121 EFCAB7 TFEB 114
## 122 IL1F10 ZBTB7B 126
## 123 CAPS2 ZBTB7C 125
## 124 APOL1 ZIC1 130
## 125 RSAD2 ZIC3 137
## 126 CD3E ZBTB18 128
## 127 OSMR LBX2 141
## 128 CD28 MSX2 111
## 129 CD38 PHOX2A 121
## 130 CD63 PITX3 124
# Wald test to obtain contrast-specific results
dsd <- DESeq(dsa, test = "Wald", sfType = "poscount")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 4 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
r <- results(dsd, contrast = c("Group", groups[3], groups[1]), alpha = 0.05, pAdjustMethod = "fdr")
groups <- levels(colData(dsd)$Group)
df <- as.data.frame(r)
# Filter only significant genes
sig_genes <- subset(r, padj < 0.05 & abs(log2FoldChange) >= 1)
# volcano plot
old.pal <- palette(c("#00BFFF", "#FF3030")) # low-hi colors
par(mar=c(4,4,2,1), cex.main=1.5)
with(sig_genes, {
plot(log2FoldChange, -log10(padj), main=paste(groups[3], "vs", groups[1]),
xlab="log2FC", ylab="-log10(Padj)", pch=20, cex=0.5)
text(log2FoldChange, -log10(padj), labels=rownames(r), cex=0.6, pos=4)
})
with(subset(r, padj<0.05 & abs(log2FoldChange) >= 1),
points(log2FoldChange, -log10(padj), pch=20, col=(sign(log2FoldChange) + 3)/2, cex=1))
legend("bottomleft", title=paste("Padj<", 0.05, sep=""), legend=c("down", "up"), pch=20,col=1:2)
# Plot only significant genes
# MD PLOT
par(mar=c(4,4,2,1), cex.main=1.5)
with(sig_genes, {
plot(log10(baseMean), log2FoldChange,
main=paste(groups[3], "vs", groups[1]),
xlab="log10(mean of normalized counts)", ylab="log2FoldChange",
pch=20, col=(sign(log2FoldChange) + 3)/2, cex=1)
text(log10(baseMean), log2FoldChange, labels=rownames(sig_genes), cex=0.6, pos=4)
})
legend("bottomleft", title=paste("Padj<", 0.05, sep=""), legend=c("down", "up"), pch=20, col=1:2)
abline(h=0)
palette(old.pal) # restore palette
# Add gene symbols to the points on the graph
# Install the packages if you do not already have them
library(ggplot2)
library(ggrepel)
plotVolcano <- function(res, title = "Volcano Plot") {
res$group <- "NS"
res$group[res$padj < 0.05 & res$log2FoldChange > 1] <- "Up"
res$group[res$padj < 0.05 & res$log2FoldChange < -1] <- "Down"
res$label <- ifelse(res$group != "NS", rownames(res), NA)
ggplot(res, aes(x = log2FoldChange, y = -log10(padj), color = group)) +
geom_point(alpha = 0.7, size = 2) +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed") +
scale_color_manual(values = c("Up" = "firebrick", "Down" = "dodgerblue", "NS" = "grey80")) +
ggrepel::geom_text_repel(aes(label = label), size = 3, max.overlaps = Inf) +
theme_minimal() +
labs(title = title, x = "log2 Fold Change", y = "-log10 adjusted p-value", color = "Regulation")
}
# Example of function usage
plotVolcano(r, paste(groups[3], "vs", groups[1]))
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 428 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
# Genes upregulated
up_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange >= 1))
# Genes downregulated
down_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange <= -1))
# All DEGs with |log2FC| > 2
de_genes <- c(up_genes, down_genes)
expr_datasa <- as.data.frame(expr_data[rownames(expr_data) %in% de_genes, ])
# Convert expression data to long format for ggplot2
expr_datas <- as.data.frame(expr_datasa)
expr_datas$Gene <- rownames(expr_datas)
expr_datas <- pivot_longer(expr_datas, cols = -Gene, names_to = "Sample", values_to = "Expression")
expr_datas <- merge(expr_datas, pheno_data, by.x = "Sample", by.y = "geo_accession")
# Check column names and a sample of the data
expresse <- r[order(r$padj)[1:429], ]
expresse <- merge(as.data.frame(expresse), expr_datasa, by = 0, sort = F)
library(dplyr)
library(tibble)
library(clusterProfiler)
library(ggplot2)
# Filter differentially expressed genes
de_genes <- subset(r, padj < 0.05 & abs(log2FoldChange) > 1)
#Create a data frame from de_genes
de_genes <- data.frame(de_genes)
# Suppose your dataframe is called df and you want to select columns ‘column1’, “column2” and ‘column3’
de_genes <- de_genes %>% select(log2FoldChange, padj)
# Assuming that rownames(df) are gene symbols
gene_symbols <- rownames(de_genes)
# Convert gene symbols to Entrez IDs
gene_entrez_ids <- bitr(gene_symbols, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
## 'select()' returned 1:1 mapping between keys and columns
# First, convert the rownames from df into a column
de_genes$Symbol <- rownames(de_genes)
# Performs the join between the original table and the converted IDs
de_genes <- merge(de_genes, gene_entrez_ids, by.x = "Symbol", by.y = "SYMBOL", all.x = TRUE)
# Ensure unique symbols
de_genes <- de_genes[!duplicated(de_genes$ENTREZID), ]
# Alternative Workflow with STRINGdb Integration in R
# Initialise the STRINGdb object for a specific organism
string_db <- STRINGdb$new(version = "12", species = 9606, score_threshold = 400, input_directory = "")
options(timeout = 600) # increases to 10 minutes
# Gene mapping using STRINGdb
mapped_genes <- string_db$map(de_genes, "Symbol", removeUnmappedRows = TRUE)
# Recover interactions for mapped genes
interactions <- string_db$get_interactions(mapped_genes$STRING_id)
# Visualise the network using igraph or other visualisation tools
library(igraph)
# Load the TxDb database
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
# Extraction of exons by gene
exons_by_gene <- exonsBy(txdb, by = "gene")
# Obtain the coordinates of the genes (taking the first and last positions of the exons)
genes_info <- range(exons_by_gene)
# Obtain transcription information
transcripts_info <- transcripts(txdb)
# Map genes of interest to Entrez IDs (replace “de_genes” with the actual list of genes)
gene_entrez <- mapIds(org.Hs.eg.db, keys = mapped_genes$Symbol, column = "ENTREZID", keytype = "SYMBOL", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Filter the genes of interest
promoters_info <- subset(genes_info, names(genes_info) %in% gene_entrez)
# Define promoter regions (2 kb upstream of TSS)
promoters <- promoters(promoters_info, upstream = 2000, downstream = 0)
# Filter the main chromosomes
promoters <- keepStandardChromosomes(promoters, pruning.mode = "coarse")
# Remove any regions that exceed the limits of the chromosomes.
promoters <- trim(promoters)
# Convert CompressedGRangesList promoters to GRanges
promoters_gr <- unlist(promoters)
# Load the JASPAR grounds database
motifs <- getMatrixSet(JASPAR2020, opts = list(species = "Homo sapiens"))
# Create a data frame with the IDs of the reasons and their names (TFs)
motif_info <- data.frame(
motif_id = names(motifs),
tf_name = sapply(motifs, function(x) x@name),
stringsAsFactors = FALSE
)
# Enriching motifs in promoting regions
motifHits <- matchMotifs(motifs, promoters_gr, genome = BSgenome.Hsapiens.UCSC.hg38)
# Count the number of occurrences of the reasons in each promoting region
motif_counts <- countOverlaps(promoters_gr, motifHits)
# Add counts to GRanges with promoting regions
promoters_gr$motif_counts <- motif_counts
# Summary of motif counts
summary(promoters_gr$motif_counts)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1 1 1 1 1
# Obtain the names of the motifs and TFs
motif_names <- names(motifHits)
# Extract the reason table from the RangedSummarizedExperiment object
motif_data <- assays(motifHits)[[1]] # Assuming that the table of reasons is in the first list of assays
# Obtain information about TFs
# The names of the TFs can be in the colnames or in associated metadata
tf_names <- colnames(motif_data)
# Add the reason count if available
motif_counts <- rowSums(motif_data) # Count the occurrence of motifs (adjust as necessary)
# Check the dimensions of the motif_data object
dim(motif_data) # Number of rows and columns
## [1] 1 633
# Check the length of tf_names and motif_counts
tf_names_length <- length(tf_names)
motif_counts_length <- length(motif_counts)
# Print the lengths for diagnosis
cat("Length of tf_names:", tf_names_length, "\n")
## Length of tf_names: 633
cat("Length of motif_counts:", motif_counts_length, "\n")
## Length of motif_counts: 1
# Verify that the dimensions of motif_data correspond to the number of TFs and motifs
motif_data_dims <- dim(motif_data)
cat("Dimensions of motif_data (rows, columns):", motif_data_dims, "\n")
## Dimensions of motif_data (rows, columns): 1 633
# Check whether the number of reasons is greater or lesser
if (tf_names_length > motif_counts_length) {
# Check additional columns
tf_names <- tf_names[1:motif_counts_length] # Adjust to the length of motif_counts
} else {
# Adjust motif_counts to match tf_names
motif_counts <- motif_counts[1:tf_names_length]
}
# Now, I have created the data frame with adjusted lengths
motif_summary <- data.frame(
tf_name = tf_names,
motif_count = motif_counts
)
# Check the correspondence between TF names and the columns in motif_data
all(tf_names %in% colnames(motif_data)) # Should return TRUE if all TF names are present
## [1] TRUE
# Sort and identify key TFs
tf_summary <- motif_summary[order(motif_summary$motif_count, decreasing = TRUE), ]
top_tf_summary <- head(tf_summary, 130) # Adjust the number as required.
# Mapping Entrez IDs to gene symbols
gene_symbols <- mapIds(org.Hs.eg.db, keys = rownames(top_tf_summary), column = "SYMBOL", keytype = "ENTREZID", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Replace rownames with gene symbols
rownames(top_tf_summary) <- gene_symbols
top_tf_summary$motif_id <- rownames(top_tf_summary)
# Combine with the table of TF names
motif1 <- merge(top_tf_summary, motif_info, by.x = "tf_name", by.y = "motif_id")
# Reorganise columns for better viewing
motif2<- motif1[, c("motif_id", "tf_name.y", "motif_count")]
colnames(motif2) <- c("motif_id", "tf_name.y", "motif_count")
# View
print(motif2)
## motif_id tf_name.y motif_count
## 1 SPP1 FOXF2 99